From 4e2c077879dce298aa0f2648cac1dba1fa5a58af Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 22 Jan 2011 16:53:16 -0700 Subject: softpipe: use proper type for format field --- src/gallium/drivers/softpipe/sp_tex_tile_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 2220955b715..9bced37990a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -92,7 +92,7 @@ struct softpipe_tex_tile_cache unsigned swizzle_g; unsigned swizzle_b; unsigned swizzle_a; - unsigned format; + enum pipe_format format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ }; -- cgit v1.2.3 From 90671fcdda52a83e2bbba581e985d25c6bff961e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 22 Jan 2011 16:59:22 -0700 Subject: gallium/softpipe: replace pipe_get_tile_swizzle() The new function, pipe_get_tile_rgba_format(), no longer takes a swizzle (we weren't actually using it anywhere). Rename it to indicate that the format is passed explicitly. --- src/gallium/auxiliary/util/u_tile.c | 48 +++--------------------- src/gallium/auxiliary/util/u_tile.h | 17 +++------ src/gallium/drivers/softpipe/sp_tex_tile_cache.c | 25 +++++------- 3 files changed, 20 insertions(+), 70 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 44cadbfcdd0..24b02be7cfb 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -391,23 +391,14 @@ pipe_get_tile_rgba(struct pipe_context *pipe, void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p) +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p) { unsigned dst_stride = w * 4; void *packed; - uint iy; - float rgba01[6]; if (u_clip_tile(x, y, &w, &h, &pt->box)) { return; @@ -427,35 +418,6 @@ pipe_get_tile_swizzle(struct pipe_context *pipe, pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); - - if (swizzle_r == PIPE_SWIZZLE_RED && - swizzle_g == PIPE_SWIZZLE_GREEN && - swizzle_b == PIPE_SWIZZLE_BLUE && - swizzle_a == PIPE_SWIZZLE_ALPHA) { - /* no-op, skip */ - return; - } - - rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; - rgba01[PIPE_SWIZZLE_ONE] = 1.0f; - - for (iy = 0; iy < h; iy++) { - float *row = p; - uint ix; - - for (ix = 0; ix < w; ix++) { - rgba01[PIPE_SWIZZLE_RED] = row[0]; - rgba01[PIPE_SWIZZLE_GREEN] = row[1]; - rgba01[PIPE_SWIZZLE_BLUE] = row[2]; - rgba01[PIPE_SWIZZLE_ALPHA] = row[3]; - - *row++ = rgba01[swizzle_r]; - *row++ = rgba01[swizzle_g]; - *row++ = rgba01[swizzle_b]; - *row++ = rgba01[swizzle_a]; - } - p += dst_stride; - } } diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 558351d0ce5..b198837e5ce 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -80,18 +80,11 @@ pipe_get_tile_rgba(struct pipe_context *pipe, float *p); void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p); +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p); void pipe_put_tile_rgba(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index e42015ad498..5105e77d436 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -278,22 +278,17 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_z = addr.bits.z; } - /* get tile from the transfer (view into texture) - * Note we're using the swizzle version of this fuction only because - * we need to pass the texture cache's format explicitly. + /* Get tile from the transfer (view into texture), explicitly passing + * the image format. */ - pipe_get_tile_swizzle(tc->pipe, - tc->tex_trans, - addr.bits.x * TILE_SIZE, - addr.bits.y * TILE_SIZE, - TILE_SIZE, - TILE_SIZE, - PIPE_SWIZZLE_RED, - PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_BLUE, - PIPE_SWIZZLE_ALPHA, - tc->format, - (float *) tile->data.color); + pipe_get_tile_rgba_format(tc->pipe, + tc->tex_trans, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, + TILE_SIZE, + tc->format, + (float *) tile->data.color); tile->addr = addr; } -- cgit v1.2.3 From 4c9ad084c1f54d83b4f27ce2b4cec23b6c7371c8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 22 Jan 2011 17:18:53 -0700 Subject: softpipe: pass surface format to get/put_tile functions When we read/write image tiles we need to use the format specified in the pipe_surface, not the pipe_transfer format (which comes from the underlying texture/resource format). This comes up when rendering to sRGB surfaces (via OpenGL render to texture). Ignoring the new GL_ARB/EXT_framebuffer_sRGB extension for now, when we render to a sRGB surface we need to treat it like a regular, linear colorspace RGB surface. Before, when we read/wrote tiles to sRGB surfaces we were inadvertantly doing the color space conversion. --- src/gallium/drivers/softpipe/sp_tile_cache.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 480860af63b..60870b8bee5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -357,11 +357,12 @@ sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos) tc->entries[pos]->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, tc->transfer, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tc->entries[pos]->data.color); + pipe_put_tile_rgba_format(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tc->entries[pos]->data.color); } tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */ } @@ -468,11 +469,12 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, pt, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); + pipe_put_tile_rgba_format(tc->pipe, pt, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tile->data.color); } } -- cgit v1.2.3 From 8e572998fc28c1daacca9d9835a71e94a62c6c6c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:23:44 +0100 Subject: nvc0: accept neg abs modifiers on lg2 --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index db8055d91cd..e3097c95694 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -446,7 +446,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) pc->emit[0] |= op << 26; - if (op >= 4) { + if (op >= 3) { if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; } else { -- cgit v1.2.3 From c18aa3c73f11c80a1f92f99d4a697900945903b8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:27:31 +0100 Subject: nvc0: commute sources of SET too if beneficial --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index acc72bff14c..404b4dccbb6 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -240,7 +240,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode)) + if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET) return; assert(src0 && src1 && src0->value && src1->value); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index b03826484e4..6c71abee69f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, -- cgit v1.2.3 From 49f16c96f150b192bfd6828ae4ba03afe3a7b8f3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:46:36 +0100 Subject: nvc0: don't apply base vertex to per-instance arrays --- src/gallium/drivers/nvc0/nvc0_push.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 74c3451c19a..fcbb7da41a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct push_context ctx; unsigned i, index_size; unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; ctx.chan = nvc0->screen->base.channel; ctx.translate = nvc0->vertex->translate; @@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset, NOUVEAU_BO_RD); - if (info->indexed) + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); -- cgit v1.2.3 From bf1df06773d6eca8b71a687f838edccd1a6c9cb8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 16:52:17 +0100 Subject: nvc0: add MARK_RING where missing to avoid too many relocs errors --- src/gallium/drivers/nvc0/nvc0_context.c | 6 ++++-- src/gallium/drivers/nvc0/nvc0_fence.c | 1 + src/gallium/drivers/nvc0/nvc0_query.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nvc0/nvc0_state_validate.c | 11 +++++++++-- src/gallium/drivers/nvc0/nvc0_vbo.c | 2 ++ 6 files changed, 19 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2118abb5d5d..1ebf9e2bafb 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -148,12 +148,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { struct resident *rsd; struct util_dynarray *array; - unsigned ctx, i; + unsigned ctx, i, n; for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; - for (i = 0; i < array->size / sizeof(struct resident); ++i) { + n = array->size / sizeof(struct resident); + MARK_RING(nvc0->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); nvc0_resource_validate(rsd->res, rsd->flags); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 9d2c48cf14d..3a3dd75c152 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence) assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index cc83fbe771c..e5e43c0e7a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe, if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe, OUT_RING (chan, 0x00001001); } + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index f608b32e1cb..68f3867fd0e 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -628,6 +628,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + MARK_RING(chan, 5, 5); nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 25aec0244db..b41ca056b6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) else width = fb->width; + MARK_RING (chan, 23, 4); BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ @@ -66,12 +67,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, fb->width << 16); OUT_RING (chan, fb->height << 16); + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + for (i = 0; i < fb->nr_cbufs; ++i) { struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; uint32_t offset = sf->offset; - + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -93,7 +96,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_bo *bo = mt->base.bo; int unk = mt->base.base.target == PIPE_TEXTURE_2D; uint32_t offset = sf->offset; - + + MARK_RING (chan, 12, 2); BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -238,6 +242,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (nvc0->clip.nr) { struct nouveau_bo *bo = nvc0->screen->uniforms; + MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, 256); OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -340,6 +345,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); if (rebind) { + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -357,6 +363,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + MARK_RING (chan, nr + 5, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index a51a887ed89..486909c1eb0 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -227,6 +227,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) } offset = vb->buffer_offset + ve->src_offset; + MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); @@ -292,6 +293,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + MARK_RING (chan, 8, 4); BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); OUT_RING (chan, (1 << 12) | vb->stride); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); -- cgit v1.2.3 From 419ff10b0ebdeec06bd3466beda2a9e1a9d054d6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 17:04:25 +0100 Subject: nvc0: recognize r63 as zero in constant folding --- src/gallium/drivers/nvc0/nvc0_pc.c | 10 ++++++++-- src/gallium/drivers/nvc0/nvc0_pc.h | 2 +- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 10 ---------- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 304a1919768..1d1b9e19b78 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -103,6 +103,12 @@ nvc0_pc_replace_value(struct nv_pc *pc, return n; } +static INLINE boolean +is_gpr63(struct nv_value *val) +{ + return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); +} + struct nv_value * nvc0_pc_find_constant(struct nv_ref *ref) { @@ -116,7 +122,7 @@ nvc0_pc_find_constant(struct nv_ref *ref) assert(!src->insn->src[0]->mod); src = src->insn->src[0]->value; } - if ((src->reg.file == NV_FILE_IMM) || + if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || (src->insn && src->insn->opcode == NV_OP_LD && src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && @@ -130,7 +136,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref) { struct nv_value *src = nvc0_pc_find_constant(ref); - return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; + return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; } static void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 969cc68c596..01ca95b0741 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -310,7 +310,7 @@ struct nv_reg { int32_t s32; int64_t s64; uint64_t u64; - uint32_t u32; + uint32_t u32; /* expected to be 0 for $r63 */ float f32; double f64; } imm; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 950bee2eda4..3709369ca2f 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1388,16 +1388,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, return nvi; } -/* -static boolean -bld_is_constant(struct nv_value *val) -{ - if (val->reg.file == NV_FILE_IMM) - return TRUE; - return val->insn && nvCG_find_constant(val->insn->src[0]); -} -*/ - static void bld_tex(struct bld_context *bld, struct nv_value *dst0[4], const struct tgsi_full_instruction *insn) -- cgit v1.2.3 From 005d186d6634abaeef348ca89c527bd5c34d0e87 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 21 Jan 2011 18:40:41 +0100 Subject: nvc0: don't omit highest bit of branch target Fixes negative relative branch offsets. --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index e3097c95694..e4b243bda87 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) */ pc->emit[0] |= (pcrel & 0x3f) << 26; - pc->emit[1] |= (pcrel >> 6) & 0x1ffff; + pc->emit[1] |= (pcrel >> 6) & 0x3ffff; } } -- cgit v1.2.3 From f9bb1c8b3332d26596dd37063d0b9866bc40e63d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 13:05:44 +0100 Subject: nvc0: fix address and value slot assignment in load combining --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 404b4dccbb6..e0d4e2daf9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -700,6 +700,9 @@ struct pass_reld_elim { int alloc; }; +/* Extend the load operation in @rec to also cover the data loaded by @ld. + * The two loads may not overlap but reference adjacent memory locations. + */ static void combine_load(struct mem_record *rec, struct nv_instruction *ld) { @@ -716,7 +719,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) return; rec->ofst = mem->reg.address; for (j = 0; j < d; ++j) - fv->def[d + j] = fv->def[j]; + fv->def[mem->reg.size / 4 + j] = fv->def[j]; d = 0; } else if ((size == 8 && rec->ofst & 3) || @@ -729,6 +732,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; nvc0_insn_delete(ld); @@ -793,6 +797,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) ((it->ofst >> 4) == (ofst >> 4)) && ((it->ofst + it->size == ofst) || (it->ofst - mem->reg.size == ofst))) { + /* only NV_OP_VFETCH can load exactly 12 bytes */ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) continue; if (it->ofst < ofst) { -- cgit v1.2.3 From 95eef7a7059c5323230badbf024f3af74a62a6cb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 22 Jan 2011 13:59:47 +0100 Subject: nvc0: remove bad assert and emit TEMP movs instead --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 3709369ca2f..be1bb44931d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -878,11 +878,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, break; case TGSI_FILE_TEMPORARY: assert(idx < BLD_MAX_TEMPS); - if (!res->insn) + if (!res->insn || res->insn->bb != bld->pc->current_block) res = bld_insn_1(bld, NV_OP_MOV, res); assert(res->reg.file == NV_FILE_GPR); - assert(res->insn->bb = bld->pc->current_block); if (bld->ti->require_stores) bld_lmem_store(bld, ptr, idx * 4 + chan, res); -- cgit v1.2.3 From 835c4ea1053730c8eea98337c9da1b14fcff6b5e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 13:09:10 +0100 Subject: nvc0: fix emit_cvt for ceil, floor and trunc --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 33 ++++++++++++++++++++------------ src/gallium/drivers/nvc0/nvc0_pc_print.c | 2 +- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index e4b243bda87..88a59cfb518 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -629,25 +629,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i) static void emit_cvt(struct nv_pc *pc, struct nv_instruction *i) { + uint32_t rint; + pc->emit[0] = 0x00000004; pc->emit[1] = 0x10000000; - if (i->opcode != NV_OP_CVT) + /* if no type conversion specified, get type from opcode */ + if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); switch (i->ext.cvt.d) { case NV_TYPE_F32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; } break; - case NV_TYPE_S32: pc->emit[0] |= 0x80; + case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ case NV_TYPE_U32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; } break; @@ -656,14 +659,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i) break; } - if (i->opcode == NV_OP_FLOOR) - pc->emit[1] |= 0x00020000; - else - if (i->opcode == NV_OP_CEIL) - pc->emit[1] |= 0x00040000; - else - if (i->opcode == NV_OP_TRUNC) - pc->emit[1] |= 0x00060000; + rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; + + if (i->opcode == NV_OP_FLOOR) { + pc->emit[0] |= rint; + pc->emit[1] |= 2 << 16; + } else + if (i->opcode == NV_OP_CEIL) { + pc->emit[0] |= rint; + pc->emit[1] |= 4 << 16; + } else + if (i->opcode == NV_OP_TRUNC) { + pc->emit[0] |= rint; + pc->emit[1] |= 6 << 16; + } if (i->saturate || i->opcode == NV_OP_SAT) pc->emit[0] |= 0x20; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 6c71abee69f..76dd0f57500 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, -- cgit v1.2.3 From ffcdd49c69811b9f768c0b32acef6527d5626a6e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 23 Jan 2011 12:03:59 +0100 Subject: r300/compiler: remove any code related to relative addressing of temporaries The hw can't do it and the code was useless anyway (it's lowered in the GLSL compiler). --- src/gallium/drivers/r300/r300_fs.c | 7 +++ src/gallium/drivers/r300/r300_tgsi_to_rc.c | 9 ++- src/gallium/drivers/r300/r300_tgsi_to_rc.h | 3 + src/gallium/drivers/r300/r300_vs.c | 7 +++ src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 69 ++++------------------ .../dri/r300/compiler/radeon_dataflow_deadcode.c | 45 +------------- .../drivers/dri/r300/compiler/radeon_optimize.c | 1 - .../dri/r300/compiler/radeon_pair_translate.c | 6 -- .../drivers/dri/r300/compiler/radeon_program.h | 6 +- .../drivers/dri/r300/compiler/radeon_program_alu.c | 1 - .../dri/r300/compiler/radeon_program_print.c | 2 +- src/mesa/drivers/dri/r300/r300_blit.c | 2 - src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c | 1 - 13 files changed, 38 insertions(+), 121 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 6d4091dc87d..84773ab386c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -414,6 +414,13 @@ static void r300_translate_fragment_shader( r300_tgsi_to_rc(&ttr, tokens); + if (ttr.error) { + fprintf(stderr, "r300 FP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (!r300->screen->caps.is_r500 || compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 15a323989b2..97ec0a1a1f2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -191,7 +191,12 @@ static void transform_dstreg( dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->WriteMask = src->Register.WriteMask; - dst->RelAddr = src->Register.Indirect; + + if (src->Register.Indirect) { + ttr->error = TRUE; + fprintf(stderr, "r300: Relative addressing of destination operands " + "is unsupported.\n"); + } } static void transform_srcreg( @@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, unsigned imm_index = 0; int i; + ttr->error = FALSE; + /* Allocate constants placeholders. * * Note: What if declared constants are not contiguous? */ diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 97641a954b9..adb044cfe56 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -47,6 +47,9 @@ struct tgsi_to_rc { /* Vertex shaders have no half swizzles, and no way to handle them, so * until rc grows proper support, indicate if they're safe to use. */ boolean use_half_swizzles; + + /* If an error occured. */ + boolean error; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 78021e2c5d4..b319890157f 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); + if (ttr.error) { + fprintf(stderr, "r300 VP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_vertex_shader(r300, vs); + return; + } + if (compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 472029f63d0..8ad2175eadf 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -490,13 +490,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) continue; if (info->HasDstReg) { - /* Relative addressing of destination operands is not supported yet. */ - if (vpi->DstReg.RelAddr) { - rc_error(&compiler->Base, "Vertex program does not support relative " - "addressing of destination operands (yet).\n"); - return; - } - /* Neither is Saturate. */ if (vpi->SaturateMode != RC_SATURATE_NONE) { rc_error(&compiler->Base, "Vertex program does not support the Saturate " @@ -668,7 +661,6 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) char hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation * ta; unsigned int i, j; - struct rc_instruction *last_inst_src_reladdr = NULL; memset(hwtemps, 0, sizeof(hwtemps)); @@ -693,28 +685,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) } } - /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up. - * For src temporaries, save the last instruction which uses relative addressing. */ - for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasDstReg) - if (inst->U.I.DstReg.RelAddr) - return; - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - last_inst_src_reladdr = inst; - } - } - } - ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - /* Pass 3: Determine original temporary lifetimes */ + /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP @@ -744,41 +719,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - struct rc_instruction *last_read; - - /* From "last_inst_src_reladdr", "end_loop", and "inst", - * select the instruction with the highest instruction index (IP). - * Note that "end_loop", if available, has always a higher index than "inst". */ - if (last_inst_src_reladdr) { - if (end_loop) { - last_read = last_inst_src_reladdr->IP > end_loop->IP ? - last_inst_src_reladdr : end_loop; - } else { - last_read = last_inst_src_reladdr->IP > inst->IP ? - last_inst_src_reladdr : inst; - } - } else { - last_read = end_loop ? end_loop : inst; - } - - ta[inst->U.I.SrcReg[i].Index].LastRead = last_read; + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; } } } - /* Pass 4: Register allocation */ + /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) { - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; - if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; - } + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; } } @@ -792,12 +748,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) break; } ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } + ta[orig].HwTemp = j; hwtemps[ta[orig].HwTemp] = 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 87906f37b12..678e1475883 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -160,12 +160,8 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { usedmask = *pused & inst->U.I.DstReg.WriteMask; - if (!inst->U.I.DstReg.RelAddr) - *pused &= ~usedmask; + *pused &= ~usedmask; } - - if (inst->U.I.DstReg.RelAddr) - mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } insts->WriteMask |= usedmask; @@ -219,22 +215,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) { struct deadcode_state s; unsigned int nr_instructions; - unsigned has_temp_reladdr_src = 0; rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; unsigned int ip; - /* Give up if there is relative addressing of destination operands. */ - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->HasDstReg && - inst->U.I.DstReg.WriteMask && - inst->U.I.DstReg.RelAddr) { - return; - } - } - memset(&s, 0, sizeof(s)); s.C = c; @@ -321,32 +304,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } - - if (!has_temp_reladdr_src) { - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - /* If there is a register read from a temporary file with relative addressing, - * mark all preceding written registers as used. */ - for (struct rc_instruction *ptr = inst->Prev; - ptr != &c->Program.Instructions; - ptr = ptr->Prev) { - opcode = rc_get_opcode_info(ptr->U.I.Opcode); - if (opcode->HasDstReg && - ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && - ptr->U.I.DstReg.WriteMask) { - mark_used(&s, - ptr->U.I.DstReg.File, - ptr->U.I.DstReg.Index, - ptr->U.I.DstReg.WriteMask); - } - } - - has_temp_reladdr_src = 1; - break; - } - } - } } update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 44f4c0fbdc7..5caff91b00e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -139,7 +139,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.DstReg.RelAddr || inst_mov->U.I.WriteALUResult || inst_mov->U.I.SaturateMode) return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fc05366f50e..ddc676c9ac6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -302,12 +302,6 @@ static void check_opcode_support(struct r300_fragment_program_compiler *c, const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg) { - if (inst->DstReg.RelAddr) { - rc_error(&c->Base, "Fragment program does not support relative addressing " - "of destination operands.\n"); - return; - } - if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index df6c94b35f9..a07f6b63c6e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -56,11 +56,7 @@ struct rc_src_register { struct rc_dst_register { unsigned int File:3; - - /** Negative values may be used for relative addressing. */ - signed int Index:(RC_REGISTER_INDEX_BITS+1); - unsigned int RelAddr:1; - + unsigned int Index:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:4; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index c8063171b81..9fc991166a3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -91,7 +91,6 @@ static struct rc_dst_register dstregtmpmask(int index, int mask) dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; - dst.RelAddr = 0; return dst; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index ae13f6742f8..193844e303b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -110,7 +110,7 @@ static void rc_print_mask(FILE * f, unsigned int mask) static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) { - rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + rc_print_register(f, dst.File, dst.Index, 0); if (dst.WriteMask != RC_MASK_XYZW) { fprintf(f, "."); rc_print_mask(f, dst.WriteMask); diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 9fd8e8fde5f..de4e5f08670 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -63,7 +63,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_HPOS; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; @@ -76,7 +75,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_TEX0; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index 471a3723cb9..232603ece59 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -128,7 +128,6 @@ static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_regi { dest->File = translate_register_file(src->File); dest->Index = src->Index; - dest->RelAddr = src->RelAddr; dest->WriteMask = src->WriteMask; } -- cgit v1.2.3 From 91eba2567eab9409d94efc3c1f07a4a3731d0047 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 23 Jan 2011 12:53:17 +0100 Subject: r300g: support sRGB colorbuffers We are not required to do the linear->sRGB conversion if ARB_framebuffer_sRGB is unsupported. However I think the conversion should work in hw except for blending, which matches the D3D9 behavior. --- src/gallium/drivers/r300/r300_blit.c | 14 ++++++----- src/gallium/drivers/r300/r300_texture.c | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index a43e83c0d36..e195128d263 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -411,14 +411,16 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; + const struct util_format_description *desc = util_format_description(old_format); boolean is_depth; - if (!pipe->screen->is_format_supported(pipe->screen, - old_format, src->target, - src->nr_samples, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0) && - util_format_is_plain(old_format)) { + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || + (!pipe->screen->is_format_supported(pipe->screen, + old_format, src->target, + src->nr_samples, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_SAMPLER_VIEW, 0) && + desc->layout == UTIL_FORMAT_LAYOUT_PLAIN)) { switch (util_format_get_blocksize(old_format)) { case 1: new_format = PIPE_FORMAT_I8_UNORM; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 6fdc504ed54..a5fbe855e7c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -370,14 +370,20 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: return R300_COLOR_FORMAT_UV88; @@ -395,13 +401,29 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 32-bit buffers. */ case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ + case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ + case PIPE_FORMAT_B8G8R8X8_SRGB: case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ + case PIPE_FORMAT_A8R8G8B8_SRGB: case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ + case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SRGB: case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ + case PIPE_FORMAT_X8B8G8R8_SRGB: case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ + /*case PIPE_FORMAT_R8G8B8X8_SRGB:*/ case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; @@ -506,9 +528,13 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ return modifier | R300_C2_SEL_A; case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; @@ -516,6 +542,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 16-bit outputs, two channels. * COLORFORMAT_UV88 stores C2 and C0. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ + case PIPE_FORMAT_L8A8_SRGB: return modifier | R300_C0_SEL_A | R300_C2_SEL_R; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: @@ -528,7 +556,11 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ + case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ + case PIPE_FORMAT_B8G8R8X8_SRGB: case PIPE_FORMAT_B10G10R10A2_UNORM: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | @@ -536,21 +568,33 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ARGB outputs. */ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ + case PIPE_FORMAT_A8R8G8B8_SRGB: case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ + case PIPE_FORMAT_X8R8G8B8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* ABGR outputs. */ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ + case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ + case PIPE_FORMAT_X8B8G8R8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ + /*case PIPE_FORMAT_R8G8B8X8_SRGB:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SRGB: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: -- cgit v1.2.3 From a287a758c6567405a7ea10df21e586d1e2ff08ec Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 23 Jan 2011 21:29:30 +0100 Subject: nvc0: implement point coord replacement But we have to cheat and peek at the GENERIC semantic indices the state tracker uses for TEXn. Only outputs from 0x300 to 0x37c can be replaced, and so we have to know on shader compilation which ones to put there in order to keep doing separate shader objects properly. At some point I'll probably create a patch that makes gallium not force us to discard the information about what is a TexCoord. --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 10 +++++---- src/gallium/drivers/nvc0/nvc0_program.c | 27 ++++++++++++++++++------ src/gallium/drivers/nvc0/nvc0_program.h | 4 +++- src/gallium/drivers/nvc0/nvc0_state_validate.c | 29 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 61932ff2b6a..af6526c8759 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -814,8 +814,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 #define NVC0_3D_POINT_COORD_REPLACE 0x00001604 -#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff -#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 #define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 @@ -864,8 +868,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 #define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 -#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 - #define NVC0_3D_TEX_MISC 0x00001664 #define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index aefaf7b98ad..613dc431bfd 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x2e0; */ case TGSI_SEMANTIC_GENERIC: + /* We'd really like to distinguish between TEXCOORD and GENERIC here, + * since only 0x300 to 0x37c can be replaced by sprite coordinates. + * Also, gl_PointCoord should be a system value and must be assigned to + * address 0x2e0. For now, let's cheat: + */ assert(si < 31); - return 0x80 + (si * 16); + if (si <= 7) + return 0x300 + si * 16; + if (si == 9) + return 0x2e0; + return 0x80 + ((si - 8) * 16); case TGSI_SEMANTIC_NORMAL: return 0x360; case TGSI_SEMANTIC_PRIMID: @@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti, case TGSI_FILE_INPUT: for (i = first; i <= last; ++i) { if (ti->prog->type == PIPE_SHADER_VERTEX) { - sn = TGSI_SEMANTIC_GENERIC; - si = i; + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; + } else { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for sprite coordinates: */ + ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; } - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - if (ti->prog->type == PIPE_SHADER_FRAGMENT) ti->interp_mode[i] = nvc0_interp_mode(decl); } @@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti, } else { for (c = 0; c < 4; ++c) ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for TFB_VARYING_LOCS: */ + ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; } } break; @@ -518,6 +531,8 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) if (!ti->input_access[i][c]) continue; a = ti->input_loc[i][c] / 2; + if (ti->input_loc[i][c] >= 0x2c0) + a -= 32; if ((a & ~7) == 0x70/2) fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ else diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index e6b210d1355..3450cec175d 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -21,16 +21,18 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; - uint32_t hdr[20]; + uint32_t hdr[20]; /* TODO: move this into code to save space */ uint32_t flags[2]; struct { uint8_t edgeflag; uint8_t num_ucps; + uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; } vp; struct { uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; } fp; void *relocs; diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index b41ca056b6a..6419011132a 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -285,6 +285,34 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size); } +static void +nvc0_validate_sprite_coords(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t reg; + + if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT; + else + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + if (nvc0->rast->pipe.point_quad_rasterization) { + uint32_t en = nvc0->rast->pipe.sprite_coord_enable; + int i; + struct nvc0_program *prog = nvc0->fragprog; + + while (en) { + i = ffs(en) - 1; + en &= ~(1 << i); + if (prog->fp.in_pos[i] >= 0xc0 && prog->fp.in_pos[i] < 0xe0) + reg |= 8 << ((prog->fp.in_pos[i] - 0xc0) / 4); + } + } + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, reg); +} + static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { @@ -404,6 +432,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, -- cgit v1.2.3 From c40ec20c273104198f7b3c52af2cd2328833b72b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 13 Dec 2010 09:11:25 -0800 Subject: r300g: Increase fragment shader limits for r400 cards r400 fragment shaders now support up to 64 temporary registers, 512 ALU instructions, and 512 TEX instructions. --- src/gallium/drivers/r300/r300_fs.c | 111 +++++++++++++---- src/gallium/drivers/r300/r300_reg.h | 9 +- src/gallium/drivers/r300/r300_screen.c | 7 -- src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 43 +++++-- .../drivers/dri/r300/compiler/r300_fragprog_emit.c | 133 ++++++++++++++++++--- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 16 ++- .../drivers/dri/r300/compiler/radeon_compiler.h | 2 + src/mesa/drivers/dri/r300/r300_reg.h | 44 +++++++ 8 files changed, 293 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 84773ab386c..b2c02bec86c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer( } } else { /* r300 */ struct r300_fragment_program_code *code = &generic_code->code.r300; - - shader->cb_code_size = 19 + - (r300->screen->caps.is_r400 ? 2 : 0) + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - imm_count * 5; + unsigned int alu_length = code->alu.length; + unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; + unsigned int tex_length = code->tex.length; + unsigned int tex_iterations = + tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; + unsigned int iterations = + alu_iterations > tex_iterations ? alu_iterations : tex_iterations; + unsigned int bank = 0; + + shader->cb_code_size = 15 + + /* R400_US_CODE_BANK */ + (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + + /* R400_US_CODE_EXT */ + (r300->screen->caps.is_r400 ? 2 : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ + (code->r390_mode ? (5 * alu_iterations) : 4) + + /* R400_US_ALU_EXT_ADDR_[0-63] */ + (code->r390_mode ? (code->alu.length) : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ + code->alu.length * 4 + + /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ + (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); - if (r300->screen->caps.is_r400) - OUT_CB_REG(R400_US_CODE_BANK, 0); - OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + if (code->r390_mode) { + OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); + } else if (r300->screen->caps.is_r400) { + /* This register appears to affect shaders even if r390_mode is + * disabled, so it needs to be set to 0 for shaders that + * don't use r390_mode. */ + OUT_CB_REG(R400_US_CODE_EXT, 0); + } + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); OUT_CB_TABLE(code->code_addr, 4); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_inst); + do { + unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); + unsigned int bank_alu_offset = bank * 64; + unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); + unsigned int bank_tex_offset = bank * 32; + + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? + (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 + } + + if (bank_alu_length > 0) { + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_addr); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_inst); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); + + if (code->r390_mode) { + OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); + } + } + + if (bank_tex_length > 0) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); + OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); + } - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_addr); + alu_length -= bank_alu_length; + tex_length -= bank_tex_length; + bank++; + } while(code->r390_mode && (alu_length > 0 || tex_length > 0)); - if (code->tex.length) { - OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CB_TABLE(code->tex.inst, code->tex.length); + /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders + * will be rendered incorrectly. */ + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, + code->r390_mode ? R400_R390_MODE_ENABLE : 0); } /* Emit immediates. */ @@ -384,12 +438,17 @@ static void r300_translate_fragment_shader( compiler.code = &shader->code; compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); compiler.Base.has_half_swizzles = TRUE; compiler.Base.has_presub = TRUE; - compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_temp_regs = + compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; - compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_alu_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; + compiler.Base.max_tex_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d1154dee40a..1d93dab2ca2 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* R4xx extended fragment shader registers. */ #define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ -# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01 -# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02 -# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04 +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) # define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 -# define R400_ADDR0_EXT_A_MSB_BIT 0x10 -# define R400_ADDR1_EXT_A_MSB_BIT 0x20 -# define R400_ADDR2_EXT_A_MSB_BIT 0x40 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) # define R400_ADDRD_EXT_A_MSB_BIT 0x80 + #define R400_US_CODE_BANK 0x46b8 # define R400_BANK_SHIFT 0 # define R400_BANK_MASK 0xf diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c75aeaa10a7..178ad63954e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -87,12 +87,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen) static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct r300_screen* r300screen = r300_screen(pscreen); - boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -175,9 +171,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (shader) { case PIPE_SHADER_FRAGMENT: diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 782671bac01..deba9ca834d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst) } } +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + /* just some random things... */ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) { @@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } for (n = 0; n <= (code->config & 3); n++) { uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; - int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, alu_offset, tex_offset, alu_end, tex_end, code_addr); if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { @@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) for (j = 0; j < 3; ++j) { int regc = code->alu.inst[i].rgb_addr >> (j * 6); int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); + (rega & 32) ? 'c' : 't', (rega & 31) | msba); } dstc[0] = 0; @@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) (code->alu.inst[i]. rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, flags); } sprintf(flags, "%s%s%s", @@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) dsta[0] = 0; if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 1db8678e890..28d132a5fe3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -64,6 +64,20 @@ struct r300_emit_state { __FILE__, __FUNCTION__, ##args); \ } while(0) +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) /** * Mark a temporary register as used. @@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index; + return src.Index & 0x1f; } return 0; @@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { + /* Set the RGB address */ unsigned int src = use_source(code, inst->RGB.Src[j]); unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + code->alu.inst[ip].rgb_addr |= src << (6*j); + /* Set the Alpha address */ src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + code->alu.inst[ip].alpha_addr |= src << (6*j); arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; code->alu.inst[ip].rgb_addr |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { @@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; code->alu.inst[ip].alpha_addr |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { @@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit) unsigned tex_offset; unsigned tex_end; + unsigned int alu_offset_msbs, alu_end_msbs; + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct rc_pair_instruction inst; @@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit) * * Also note that the register specification from AMD is slightly * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - (alu_offset << R300_ALU_START_SHIFT) | - (alu_end << R300_ALU_SIZE_SHIFT) | - (tex_offset << R300_TEX_START_SHIFT) | - (tex_end << R300_TEX_SIZE_SHIFT) | - emit->node_flags; - + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } return 1; } @@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) unsigned int opcode; PROG_CODE; - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { error("Too many TEX instructions"); return 0; } @@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; return 1; } @@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; @@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) finish_node(&emit); code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; code->code_offset = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; if (emit.current_node < 3) { int shift = 3 - emit.current_node; @@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) for(i = 0; i < shift; ++i) code->code_addr[i] = 0; } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index b69e81698ae..d1451668947 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -31,6 +31,9 @@ #define R300_PFS_NUM_TEMP_REGS 32 #define R300_PFS_NUM_CONST_REGS 32 +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 @@ -187,24 +190,29 @@ struct r300_fragment_program_node { */ struct r300_fragment_program_code { struct { - int length; /**< total # of texture instructions used */ - uint32_t inst[R300_PFS_MAX_TEX_INST]; + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; } tex; struct { - int length; /**< total # of ALU instructions used */ + unsigned int length; /**< total # of ALU instructions used */ struct { uint32_t rgb_inst; uint32_t rgb_addr; uint32_t alpha_inst; uint32_t alpha_addr; - } inst[R300_PFS_MAX_ALU_INST]; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; } alu; uint32_t config; /* US_CONFIG */ uint32_t pixsize; /* US_PIXSIZE */ uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e6633395895..1e64af06bc3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -50,6 +50,7 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ + unsigned is_r400:1; unsigned is_r500:1; unsigned has_half_swizzles:1; unsigned has_presub:1; @@ -57,6 +58,7 @@ struct radeon_compiler { unsigned max_temp_regs; unsigned max_constants; int max_alu_insts; + unsigned max_tex_insts; /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants:1; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f7705b0f6fe..2b9d85fae8b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) +# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24 +# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24) +# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28 +# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28) /* gap */ @@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_SIZE_MASK (31 << 17) # define R300_RGBA_OUT (1 << 22) # define R300_W_OUT (1 << 23) +# define R400_TEX_START_MSB_SHIFT 24 +# define R400_TEX_START_MSG_MASK (0xf << 24) +# define R400_TEX_SIZE_MSB_SHIFT 28 +# define R400_TEX_SIZE_MSG_MASK (0xf << 28) /* TEX * As far as I can tell, texture instructions cannot write into output @@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_OP_TXP 3 # define R300_TEX_OP_TXB 4 # define R300_TEX_INST_MASK (7 << 15) +# define R400_SRC_ADDR_EXT_BIT (1 << 19) +# define R400_DST_ADDR_EXT_BIT (1 << 20) /* Output format from the unfied shader */ #define R300_US_OUT_FMT 0x46A4 @@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ +/* R4xx extended fragment shader registers. */ +#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) +# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) +# define R400_ADDRD_EXT_A_MSB_BIT 0x80 + +#define R400_US_CODE_BANK 0x46b8 +# define R400_BANK_SHIFT 0 +# define R400_BANK_MASK 0xf +# define R400_R390_MODE_ENABLE (1 << 4) +#define R400_US_CODE_EXT 0x46bc +# define R400_ALU_OFFSET_MSB_SHIFT 0 +# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0) +# define R400_ALU_SIZE_MSB_SHIFT 3 +# define R400_ALU_SIZE_MSB_MASK (0x7 << 3) +# define R400_ALU_START0_MSB_SHIFT 6 +# define R400_ALU_START0_MSB_MASK (0x7 << 6) +# define R400_ALU_SIZE0_MSB_SHIFT 9 +# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9) +# define R400_ALU_START1_MSB_SHIFT 12 +# define R400_ALU_START1_MSB_MASK (0x7 << 12) +# define R400_ALU_SIZE1_MSB_SHIFT 15 +# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15) +# define R400_ALU_START2_MSB_SHIFT 18 +# define R400_ALU_START2_MSB_MASK (0x7 << 18) +# define R400_ALU_SIZE2_MSB_SHIFT 21 +# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21) +# define R400_ALU_START3_MSB_SHIFT 24 +# define R400_ALU_START3_MSB_MASK (0x7 << 24) +# define R400_ALU_SIZE3_MSB_SHIFT 27 +# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27) +/* END: R4xx extended fragment shader registers. */ + /* Fog: Fog Blending Enable */ #define R300_FG_FOG_BLEND 0x4bc0 # define R300_FG_FOG_BLEND_DISABLE (0 << 0) -- cgit v1.2.3 From 9a9630dcf0666af5a29d529db2ccb832b592e191 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 24 Jan 2011 00:35:53 +0100 Subject: i915g: Improve constant handling --- src/gallium/drivers/i915/i915_state.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 4a1a4a04f6d..e386a3f4630 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -535,21 +535,31 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, /* if we have a new buffer compare it with the old one */ if (buf) { - struct i915_buffer *ir = i915_buffer(buf); + struct i915_buffer *ibuf = i915_buffer(buf); struct pipe_resource *old_buf = i915->constants[shader]; struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL; - - new_num = ir->b.b.width0 / 4 * sizeof(float); - - if (old && new_num != i915->current.num_user_constants[shader]) - diff = memcmp(old->data, ir->data, ir->b.b.width0); + unsigned old_num = i915->current.num_user_constants[shader]; + + new_num = ibuf->b.b.width0 / 4 * sizeof(float); + + if (old_num == new_num) { + if (old_num == 0) + diff = FALSE; +#if 0 + /* XXX no point in running this code since st/mesa only uses user buffers */ + /* Can't compare the buffer data since they are userbuffers */ + else if (old && old->free_on_destroy) + diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0); +#else + (void)old; +#endif + } } else { diff = i915->current.num_user_constants[shader] != 0; } /* * flush before updateing the state. - * XXX: looks like its okay to skip the flush for vertex cbufs */ if (diff && shader == PIPE_SHADER_FRAGMENT) draw_flush(i915->draw); -- cgit v1.2.3 From 832029e1c1c027e8f697cc8fdc75902e3c24f38a Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 24 Jan 2011 02:03:59 +0100 Subject: i915g: Remove draw_flushes and state that we don't need to track --- src/gallium/drivers/i915/i915_context.c | 20 -------------------- src/gallium/drivers/i915/i915_context.h | 2 -- src/gallium/drivers/i915/i915_state.c | 29 ++++++++++++++++------------- 3 files changed, 16 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index ea3c10b5e78..648d0090c9a 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -50,7 +50,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned i; unsigned cbuf_dirty; @@ -63,14 +62,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (i915->dirty) i915_update_derived(i915); - /* - * Map vertex buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - /* * Map index buffer, if present */ @@ -90,13 +81,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) */ draw_vbo(i915->draw, info); - /* - * unmap vertex/index buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) draw_set_mapped_index_buffer(draw, NULL); } @@ -117,10 +101,6 @@ static void i915_destroy(struct pipe_context *pipe) if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); - for (i = 0; i < i915->num_vertex_buffers; i++) { - pipe_resource_reference(&i915->vertex_buffer[i].buffer, NULL); - } - /* unbind framebuffer */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index b7f1fb22221..7f49dc96d5d 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -219,14 +219,12 @@ struct i915_context { struct pipe_scissor_state scissor; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; unsigned dirty; unsigned num_samplers; unsigned num_fragment_sampler_views; - unsigned num_vertex_buffers; struct i915_winsys_batchbuffer *batch; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e386a3f4630..b31cc306a44 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -776,17 +776,25 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + struct draw_context *draw = i915->draw; + int i; - util_copy_vertex_buffers(i915->vertex_buffer, - &i915->num_vertex_buffers, - buffers, count); +#if 0 + /* XXX doesn't look like this is needed */ + /* unmap old */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + } +#endif /* pass-through to draw module */ - draw_set_vertex_buffers(i915->draw, count, buffers); + draw_set_vertex_buffers(draw, count, buffers); + + /* map new */ + for (i = 0; i < count; i++) { + void *buf = i915_buffer(buffers[i].buffer)->data; + draw_set_mapped_vertex_buffer(draw, i, buf); + } } static void * @@ -811,11 +819,6 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); - /* pass-through to draw module */ if (i915_velems) { draw_set_vertex_elements(i915->draw, -- cgit v1.2.3 From a82408c3537afe09e40b3ee6b3a6cb1acc62f715 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 24 Jan 2011 03:25:17 +0100 Subject: Revert "r300g/swtcl: re-enable LLVM" This reverts commit 88550083b3857184445075e70fed8b2eed4952a1. --- src/gallium/drivers/r300/r300_context.c | 16 +--------------- src/gallium/drivers/r300/r300_context.h | 1 - src/gallium/drivers/r300/r300_screen.c | 8 -------- 3 files changed, 1 insertion(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 91263ad7bcd..e265bdbd3b0 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -35,10 +35,6 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -#ifdef HAVE_LLVM -#include "gallivm/lp_bld_init.h" -#endif - static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) { @@ -106,14 +102,9 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->blitter) util_blitter_destroy(r300->blitter); - if (r300->draw) { + if (r300->draw) draw_destroy(r300->draw); -#ifdef HAVE_LLVM - gallivm_destroy(r300->gallivm); -#endif - } - if (r300->upload_vb) u_upload_destroy(r300->upload_vb); if (r300->upload_ib) @@ -432,12 +423,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300screen->caps.has_tcl) { /* Create a Draw. This is used for SW TCL. */ -#ifdef HAVE_LLVM - r300->gallivm = gallivm_create(); - r300->draw = draw_create_gallivm(&r300->context, r300->gallivm); -#else r300->draw = draw_create(&r300->context); -#endif if (r300->draw == NULL) goto fail; /* Enable our renderer. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e09cf87f733..9030f1bb982 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -456,7 +456,6 @@ struct r300_context { struct r300_screen *screen; /* Draw module. Used mostly for SW TCL. */ - struct gallivm_state *gallivm; struct draw_context* draw; /* Vertex buffer for SW TCL. */ struct pipe_resource* vbo; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 178ad63954e..37b8bc6440f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -34,10 +34,6 @@ #include "draw/draw_context.h" -#ifdef HAVE_LLVM -#include "gallivm/lp_bld_init.h" -#endif - /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. * @@ -483,9 +479,5 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) util_format_s3tc_init(); -#ifdef HAVE_LLVM - lp_build_init(); -#endif - return &r300screen->screen; } -- cgit v1.2.3 From d78a984baa602a9bbb07622b669deea3069d2ab3 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 24 Jan 2011 11:39:28 +0100 Subject: r300g: handle PIPE_CAP_INSTANCED_DRAWING in get_param --- src/gallium/drivers/r300/r300_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 37b8bc6440f..a8ba9c1e5fa 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -126,6 +126,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_INSTANCED_DRAWING: return 0; /* Texturing. */ -- cgit v1.2.3 From 09109c11d9efd78c0f87fc55911e03eda5fd980b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 24 Jan 2011 13:29:19 +0100 Subject: r300g: remove any traces of depth_clamp I couldn't make it work. GB_TILE_CONFIG.Z_EXTENDED, which enables per-pixel Z clamping, and VAP_CLIP_CNTL.CLIP_DISABLE, which disables clipping, do help, but they also add regressions like random graphics corruptions in some games. --- src/gallium/drivers/r300/r300_screen.c | 2 +- src/gallium/drivers/r300/r300_state.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a8ba9c1e5fa..880372ec83f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -121,7 +121,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DUAL_SOURCE_BLEND: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_STREAM_OUTPUT: diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3a97b76a4c8..2664c1dc834 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -442,8 +442,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, OUT_CB_TABLE(state->ucp, state->nr * 4); } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN | - (state->depth_clamp ? R300_CLIP_DISABLE : 0)); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CB; r300_mark_atom_dirty(r300, &r300->clip_state); -- cgit v1.2.3 From db234176b12293dafbda0c6d4299c7e89430a8f8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 24 Jan 2011 13:30:36 +0100 Subject: r300g: remove unused function --- src/gallium/drivers/r300/r300_state_inlines.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 7e501221b1f..06da04c7ad7 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -25,13 +25,9 @@ #define R300_STATE_INLINES_H #include "draw/draw_vertex.h" - #include "pipe/p_format.h" - #include "util/u_format.h" - #include "r300_reg.h" - #include /* Some maths. These should probably find their way to u_math, if needed. */ @@ -341,24 +337,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso) R500_TX_ANISO_HIGH_QUALITY; } -/* Non-CSO state. (For now.) */ - -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - } - return 0; -} - - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { -- cgit v1.2.3 From 1af59b28b57f460b2944f803251a297fd4f7a769 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 25 Jan 2011 00:27:50 +0100 Subject: r600g: FLT_TO_INT* are vector instructions on Evergreen. FLT_TO_INT is a vector instruction, despite what the (current) documentation says. FLT_TO_INT_FLOOR and FLT_TO_INT_RPI aren't explicitly mentioned in the documentation, but those are vector instructions too. --- src/gallium/drivers/r600/r600_asm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f46059b9e90..9cdd10f9876 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -417,9 +417,9 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) case CHIPREV_EVERGREEN: default: if (!alu->is_op3) + /* Note that FLT_TO_INT* instructions are vector instructions + * on Evergreen, despite what the documentation says. */ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || - alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || - alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || -- cgit v1.2.3 From becb733dbc87828c9a699364b3a0488e7d2f1276 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 16 Dec 2010 05:33:46 +0100 Subject: rbug: Fix surface reference leak --- src/gallium/drivers/rbug/rbug_objects.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index 7d7cc482ae6..15f5db40093 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -98,8 +98,9 @@ rbug_surface_create(struct rbug_context *rb_context, pipe_reference_init(&rb_surface->base.reference, 1); rb_surface->base.texture = NULL; + rb_surface->base.context = &rb_context->base; + rb_surface->surface = surface; /* we own the surface already */ pipe_resource_reference(&rb_surface->base.texture, &rb_resource->base); - rb_surface->surface = surface; return &rb_surface->base; @@ -113,8 +114,7 @@ rbug_surface_destroy(struct rbug_context *rb_context, struct rbug_surface *rb_surface) { pipe_resource_reference(&rb_surface->base.texture, NULL); - rb_context->pipe->surface_destroy(rb_context->pipe, - rb_surface->surface); + pipe_surface_reference(&rb_surface->surface, NULL); FREE(rb_surface); } -- cgit v1.2.3 From daaf542220e5008b54648c6ed853f3f4f87fc7b5 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 29 Dec 2010 09:59:28 +0100 Subject: svga: Use get once helpers for context debug envs --- src/gallium/drivers/svga/svga_context.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 1e513f1039f..5ba4ddf0a32 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -43,6 +43,10 @@ #include "svga_debug.h" #include "svga_state.h" +DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE); +DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0); static void svga_destroy( struct pipe_context *pipe ) { @@ -113,10 +117,10 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, /* debug */ - svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); - svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); - svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE); - svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); + svga->debug.no_swtnl = debug_get_option_no_swtnl(); + svga->debug.force_swtnl = debug_get_option_force_swtnl(); + svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap(); + svga->debug.disable_shader = debug_get_option_disable_shader(); if (!svga_init_swtnl(svga)) goto no_swtnl; -- cgit v1.2.3 From c523f31f4a35f8396ab35859c70fb041c210cedb Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 29 Dec 2010 12:50:59 +0100 Subject: svga: Add more swrast debuging --- src/gallium/drivers/svga/svga_context.c | 4 ++++ src/gallium/drivers/svga/svga_context.h | 7 +++++++ src/gallium/drivers/svga/svga_pipe_rasterizer.c | 22 ++++++++++++++++++---- src/gallium/drivers/svga/svga_state_need_swtnl.c | 5 +++++ 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 5ba4ddf0a32..61f99d1eb18 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -47,6 +47,8 @@ DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE) DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE); DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE); DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0); +DEBUG_GET_ONCE_BOOL_OPTION(no_line_width, "SVGA_NO_LINE_WIDTH", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", FALSE); static void svga_destroy( struct pipe_context *pipe ) { @@ -121,6 +123,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga->debug.force_swtnl = debug_get_option_force_swtnl(); svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap(); svga->debug.disable_shader = debug_get_option_disable_shader(); + svga->debug.no_line_width = debug_get_option_no_line_width(); + svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple(); if (!svga_init_swtnl(svga)) goto no_swtnl; diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index d4970908b1e..0550ddd79b9 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -148,6 +148,10 @@ struct svga_rasterizer_state { unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ unsigned need_pipeline:16; /* which prims do we need help for? */ + + const char* need_pipeline_tris_str; + const char* need_pipeline_lines_str; + const char* need_pipeline_points_str; }; struct svga_sampler_state { @@ -317,6 +321,9 @@ struct svga_context unsigned shader_id; unsigned disable_shader; + + boolean no_line_width; + boolean force_hw_line_stipple; } debug; struct { diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index e97b4e57415..c6657e79ef3 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -64,7 +64,9 @@ static void * svga_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { + struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); + /* need this for draw module. */ rast->templ = *templ; @@ -93,17 +95,22 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* Use swtnl + decomposition implement these: */ - if (templ->poly_stipple_enable) + if (templ->poly_stipple_enable) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "poly stipple"; + } if (templ->line_width != 1.0 && - templ->line_width != 0.0) + templ->line_width != 0.0 && + !svga->debug.no_line_width) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line width"; + } if (templ->line_stipple_enable) { /* LinePattern not implemented on all backends. */ - if (0) { + if (!svga->debug.force_hw_line_stipple) { SVGA3dLinePattern lp; lp.repeat = templ->line_stipple_factor + 1; lp.pattern = templ->line_stipple_pattern; @@ -111,11 +118,14 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } else { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line stipple"; } } - if (templ->point_smooth) + if (templ->point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; + rast->need_pipeline_points_str = "smooth points"; + } { int fill_front = templ->fill_front; @@ -148,6 +158,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, * front/back fill modes: */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "different front/back fillmodes"; } else { offset = offset_front; @@ -172,6 +183,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation"; } /* If we are decomposing to lines, and lines need the pipeline, @@ -182,6 +194,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing lines"; } /* Similarly for points: @@ -191,6 +204,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing points"; } if (offset) { diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index 8ba5ac8cdb4..e06e1f8e5f9 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -118,6 +118,11 @@ static int update_need_pipeline( struct svga_context *svga, __FUNCTION__, svga->curr.rast->need_pipeline, (1 << svga->curr.reduced_prim) ); + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline tris (%s), lines (%s), points (%s)\n", + __FUNCTION__, + svga->curr.rast->need_pipeline_tris_str, + svga->curr.rast->need_pipeline_lines_str, + svga->curr.rast->need_pipeline_points_str); need_pipeline = TRUE; } -- cgit v1.2.3 From db4f6c7eeb7134c837b6832f60e973e818ec3977 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 24 Jan 2011 20:04:31 -0800 Subject: nvc0: Move declaration before code. Fixes nvc0 SCons build. --- src/gallium/drivers/nvc0/nvc0_mm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c index 0629dad19c9..516d2e31b55 100644 --- a/src/gallium/drivers/nvc0/nvc0_mm.c +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) static INLINE uint32_t mm_default_slab_size(unsigned chunk_order) { - assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); - static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = { 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 }; + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; } -- cgit v1.2.3 From 40ac24e631e694e00652a2df6c0ac5d0f07f620f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 25 Jan 2011 11:57:35 -0700 Subject: softpipe: fix off-by-one error in setup_fragcoord_coeff() If we invert Y, need to subtract one from the surface height. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=26795 for softpipe. NOTE: This is a candidate for the 7.9 and 7.10 branches. --- src/gallium/drivers/softpipe/sp_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5d727dc00df..0ce28f4c6ee 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -575,7 +575,7 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0) + (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + (spfs->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; -- cgit v1.2.3 From 90c2fd86407999475ff6accecf36e5a2c75feb9b Mon Sep 17 00:00:00 2001 From: Mathias Fröhlich Date: Sun, 23 Jan 2011 22:35:13 +0100 Subject: r600g: Implement timer queries. --- src/gallium/drivers/r600/r600.h | 1 + src/gallium/drivers/r600/r600_pipe.c | 5 ++- src/gallium/winsys/r600/drm/r600_drm.c | 30 +++++++++++++++ src/gallium/winsys/r600/drm/r600_hw_context.c | 55 ++++++++++++++++++++------- src/gallium/winsys/r600/drm/r600_priv.h | 1 + 5 files changed, 78 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index a852bef6156..b8888bede20 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -113,6 +113,7 @@ struct r600_tiling_info { enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); +unsigned r600_get_clock_crystal_freq(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 53d2c10c560..1ee327f16af 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -283,7 +283,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 1; /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ @@ -318,6 +317,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + /* Timer queries, present when the clock frequency is non zero. */ + case PIPE_CAP_TIMER_QUERY: + return r600_get_clock_crystal_freq(rscreen->radeon) != 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 6fc147fb6f4..3c7e9aa4490 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -41,6 +41,10 @@ #define RADEON_INFO_TILING_CONFIG 0x6 #endif +#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 +#endif + enum radeon_family r600_get_family(struct radeon *r600) { return r600->family; @@ -56,6 +60,11 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) return &radeon->tiling_info; } +unsigned r600_get_clock_crystal_freq(struct radeon *radeon) +{ + return radeon->clock_crystal_freq; +} + static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info; @@ -124,6 +133,24 @@ static int radeon_drm_get_tiling(struct radeon *radeon) return 0; } +static int radeon_get_clock_crystal_freq(struct radeon *radeon) +{ + struct drm_radeon_info info; + uint32_t clock_crystal_freq; + int r; + + radeon->device = 0; + info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; + info.value = (uintptr_t)&clock_crystal_freq; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + if (r) + return r; + + radeon->clock_crystal_freq = clock_crystal_freq; + return 0; +} + static int radeon_init_fence(struct radeon *radeon) { radeon->fence = 1; @@ -205,6 +232,9 @@ static struct radeon *radeon_new(int fd, unsigned device) if (radeon_drm_get_tiling(radeon)) return NULL; } + /* get the GPU counter frequency, failure is non fatal */ + radeon_get_clock_crystal_freq(radeon); + radeon->bomgr = r600_bomgr_create(radeon, 1000000); if (radeon->bomgr == NULL) { return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 96e409015c2..6b7e4d886f2 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -1263,7 +1263,8 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu for (i = 0; i < query->num_results; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; - if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { + if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) + || query->type == PIPE_QUERY_TIME_ELAPSED) { query->result += end - start; } } @@ -1275,8 +1276,15 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { - /* query request needs 6 dwords for begin + 6 dwords for end */ - if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + unsigned required_space; + + /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ + if (query->type == PIPE_QUERY_TIME_ELAPSED) + required_space = 16; + else + required_space = 12; + + if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ r600_context_flush(ctx); } @@ -1288,10 +1296,19 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) } /* emit begin query */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); - ctx->pm4[ctx->pm4_cdwords++] = 0; + if (query->type == PIPE_QUERY_TIME_ELAPSED) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1304,10 +1321,19 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) void r600_query_end(struct r600_context *ctx, struct r600_query *query) { /* emit begin query */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); - ctx->pm4[ctx->pm4_cdwords++] = 0; + if (query->type == PIPE_QUERY_TIME_ELAPSED) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1322,7 +1348,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned { struct r600_query *query; - if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER && query_type != PIPE_QUERY_TIME_ELAPSED) return NULL; query = calloc(1, sizeof(struct r600_query)); @@ -1366,7 +1392,10 @@ boolean r600_context_query_result(struct r600_context *ctx, } if (!r600_query_result(ctx, query, wait)) return FALSE; - *result = query->result; + if (query->type == PIPE_QUERY_TIME_ELAPSED) + *result = (1000000*query->result)/r600_get_clock_crystal_freq(ctx->radeon); + else + *result = query->result; query->result = 0; return TRUE; } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index a38a6481b4f..2d91cd97d68 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -49,6 +49,7 @@ struct radeon { unsigned fence; unsigned *cfence; struct r600_bo *fence_bo; + unsigned clock_crystal_freq; }; struct r600_reg { -- cgit v1.2.3 From 779e9cb658dba4ef44fae7e8aa62409f7227f46c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 25 Jan 2011 20:27:10 -0700 Subject: softpipe: support for 1D/2D texture arrays --- src/gallium/drivers/softpipe/sp_screen.c | 4 + src/gallium/drivers/softpipe/sp_tex_sample.c | 201 +++++++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_texture.c | 6 +- 3 files changed, 210 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index e19f2e6fc7d..6d47fb96280 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -125,6 +125,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_INSTANCED_DRAWING: return 1; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; default: return 0; } @@ -185,7 +187,9 @@ softpipe_is_format_supported( struct pipe_screen *screen, assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 242c27c7ebd..15f7eb2b94e 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -539,6 +539,19 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size, } +/** + * Do coordinate to array index conversion. For array textures. + */ +static INLINE void +wrap_array_layer(const float coord[4], unsigned size, int layer[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + int c = util_ifloor(coord[ch] + 0.5F); + layer[ch] = CLAMP(c, 0, size - 1); + } +} + /** * Examine the quad's texture coordinates to compute the partial @@ -989,6 +1002,47 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + wrap_array_layer(t, texture->height0, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d(samp, addr, x[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + static void img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -1033,6 +1087,50 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x[4], y[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + wrap_array_layer(p, texture->depth0, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_3d(samp, addr, x[j], y[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + static INLINE union tex_tile_address face(union tex_tile_address addr, unsigned face ) { @@ -1167,6 +1265,47 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x0[4], x1[4], layer[4]; + float xw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + wrap_array_layer(t, texture->height0, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d(samp, addr, x0[j], layer[j]); + const float *tx1 = get_texel_2d(samp, addr, x1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]); + } + } +} + + static void img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -1214,6 +1353,54 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4], layer[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + wrap_array_layer(p, texture->depth0, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_3d(samp, addr, x0[j], y0[j], layer[j]); + const float *tx1 = get_texel_3d(samp, addr, x1[j], y0[j], layer[j]); + const float *tx2 = get_texel_3d(samp, addr, x0[j], y1[j], layer[j]); + const float *tx3 = get_texel_3d(samp, addr, x1[j], y1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); + } + } +} + + static void img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -1906,8 +2093,10 @@ get_lambda_func(const union sp_sampler_key key) switch (key.bits.target) { case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: return compute_lambda_1d; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return compute_lambda_2d; @@ -1932,6 +2121,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_1d_linear; break; + case PIPE_TEXTURE_1D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_1d_array_nearest; + else + return img_filter_1d_array_linear; + break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: /* Try for fast path: @@ -1967,6 +2162,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_2d_linear; break; + case PIPE_TEXTURE_2D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_2d_array_nearest; + else + return img_filter_2d_array_linear; + break; case PIPE_TEXTURE_CUBE: if (filter == PIPE_TEX_FILTER_NEAREST) return img_filter_cube_nearest; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 509d9982b17..2daed2022e9 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -227,9 +227,13 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr, unsigned offset = spr->level_offset[level]; if (spr->base.target == PIPE_TEXTURE_CUBE || - spr->base.target == PIPE_TEXTURE_3D) { + spr->base.target == PIPE_TEXTURE_3D || + spr->base.target == PIPE_TEXTURE_2D_ARRAY) { offset += layer * nblocksy * spr->stride[level]; } + else if (spr->base.target == PIPE_TEXTURE_1D_ARRAY) { + offset += layer * spr->stride[level]; + } else { assert(layer == 0); } -- cgit v1.2.3 From db299a9f8244d53d9041fcdbd396a77ebe1f9e3e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 25 Jan 2011 05:37:52 +0100 Subject: r300g: fix some bugs with zbuffer compression (v4) This drops the memblock manager for ZMASK. Instead, only one zbuffer can be compressed at a time. Note that this does not necessarily have to be slower. When there is a large number of zbuffers, compression might be used more often than it was before. It's also easier to debug. How it works: 1) 'clear' turns the compression on. 2) If some other zbuffer is set or the currently-bound zbuffer is used for texturing, the driver decompresses it and then turns the compression off. Notes: - The ZMASK clear has been refactored, so that only one packet3 is used to clear ZMASK. - The 8x8 compression mode is disabled. I couldn't make it work without issues. - Also removed driver-specific stuff from u_blitter. Driver status: - RV530 and R580 appear to just work (finally). - RV570 should work, but there may be an issue that we don't correctly calculate the number of dwords to clear, resulting in a partially uninitialized zbuffer. - RS690 misrenders as if no ZMASK clear happened. No idea what's going on. - RV350 may even hardlock. This issue was already present and this patch doesn't fix it. I think we are still missing some hardware info we need to make the zbuffer compression work fully. Note that there is also an issue with HiZ, resulting in a sort of blocky zigzagged corruption around some objects. --- src/gallium/auxiliary/util/u_blitter.c | 77 +++++---- src/gallium/auxiliary/util/u_blitter.h | 13 +- src/gallium/auxiliary/util/u_inlines.h | 15 ++ src/gallium/drivers/r300/r300_blit.c | 227 ++++++++++++++------------ src/gallium/drivers/r300/r300_chipset.c | 2 + src/gallium/drivers/r300/r300_chipset.h | 13 +- src/gallium/drivers/r300/r300_context.c | 17 +- src/gallium/drivers/r300/r300_context.h | 25 +-- src/gallium/drivers/r300/r300_emit.c | 88 +++++----- src/gallium/drivers/r300/r300_hyperz.c | 120 +++----------- src/gallium/drivers/r300/r300_hyperz.h | 2 +- src/gallium/drivers/r300/r300_state.c | 66 +++++--- src/gallium/drivers/r300/r300_state_derived.c | 28 +++- src/gallium/drivers/r300/r300_texture.c | 2 - 14 files changed, 364 insertions(+), 331 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index e72f267f53d..1f9d50932e3 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -86,7 +86,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -158,10 +157,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -247,7 +242,6 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->dsa_write_depth_keep_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); pipe->delete_rasterizer_state(pipe, ctx->rs_state); pipe->delete_vs_state(pipe, ctx->vs); @@ -665,12 +659,13 @@ static void blitter_draw_rectangle(struct blitter_context *blitter, blitter_draw_quad(ctx); } -void util_blitter_clear(struct blitter_context *blitter, - unsigned width, unsigned height, - unsigned num_cbufs, - unsigned clear_buffers, - const float *rgba, - double depth, unsigned stencil) +static void util_blitter_clear_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil, + void *custom_blend, void *custom_dsa) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; @@ -681,26 +676,28 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_check_saved_CSOs(ctx); /* bind CSOs */ - if (clear_buffers & PIPE_CLEAR_COLOR) + if (custom_blend) { + pipe->bind_blend_state(pipe, custom_blend); + } else if (clear_buffers & PIPE_CLEAR_COLOR) { pipe->bind_blend_state(pipe, ctx->blend_write_color); - else + } else { pipe->bind_blend_state(pipe, ctx->blend_keep_color); + } - if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { - sr.ref_value[0] = stencil & 0xff; + if (custom_dsa) { + pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa); + } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else if (clear_buffers & PIPE_CLEAR_DEPTH) { + } else if (clear_buffers & PIPE_CLEAR_DEPTH) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil); - } - else if (clear_buffers & PIPE_CLEAR_STENCIL) { - sr.ref_value[0] = stencil & 0xff; + } else if (clear_buffers & PIPE_CLEAR_STENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else + } else { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + } + + sr.ref_value[0] = stencil & 0xff; + pipe->set_stencil_ref(pipe, &sr); pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); @@ -713,6 +710,27 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } +void util_blitter_clear(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil) +{ + util_blitter_clear_custom(blitter, width, height, num_cbufs, + clear_buffers, rgba, depth, stencil, + NULL, NULL); +} + +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa) +{ + const float rgba[4] = {0, 0, 0, 0}; + util_blitter_clear_custom(blitter, width, height, 0, + 0, rgba, depth, 0, NULL, custom_dsa); +} + static boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) @@ -1024,12 +1042,3 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* flush a region of a depth stencil surface for r300g */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - util_blitter_custom_depth_stencil(blitter, dstsurf, NULL, - ctx->dsa_flush_depth_stencil, 0.0f); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 922a8580ac1..47494c94110 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -55,13 +55,13 @@ struct blitter_context * \param y1 A Y coordinate of the top-left corner. * \param x2 An X coordinate of the bottom-right corner. * \param y2 A Y coordinate of the bottom-right corner. - * \param depth A depth which the rectangle is rendered at. + * \param depth A depth which the rectangle is rendered at. * * \param type Semantics of the attributes "attrib". * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes - * make up a constant RGBA color, and should go to the COLOR0 - * varying slot of a fragment shader. + * make up a constant RGBA color, and should go + * to the GENERIC0 varying slot of a fragment shader. * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and * {a3, a4} specify top-left and bottom-right texture * coordinates of the rectangle, respectively, and should go @@ -141,6 +141,10 @@ void util_blitter_clear(struct blitter_context *blitter, const float *rgba, double depth, unsigned stencil); +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa); + /** * Copy a block of pixels from one surface to another. * @@ -200,9 +204,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); - void util_blitter_custom_depth_stencil(struct blitter_context *blitter, struct pipe_surface *zsurf, struct pipe_surface *cbsurf, diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index b4870bce981..fc4eb8fdd2e 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -160,6 +160,21 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, pipe_surface_reset(ctx, ps, pt, level, layer, flags); } +/* Return true if the surfaces are equal. */ +static INLINE boolean +pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2) +{ + return s1->texture == s2->texture && + s1->format == s2->format && + (s1->texture->target != PIPE_BUFFER || + (s1->u.buf.first_element == s2->u.buf.first_element && + s1->u.buf.last_element == s2->u.buf.last_element)) && + (s1->texture->target == PIPE_BUFFER || + (s1->u.tex.level == s2->u.tex.level && + s1->u.tex.first_layer == s2->u.tex.first_layer && + s1->u.tex.last_layer == s2->u.tex.last_layer)); +} + /* * Convenience wrappers for screen buffer functions. */ diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index e195128d263..f24d5582e17 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -58,8 +58,9 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, r300->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) + if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + } if (op & R300_COPY) { struct r300_textures_state* state = @@ -108,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static boolean r300_fast_zclear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + /* Cannot decompress zmask with a 16-bit zbuffer. + * Also compression causes a hung. */ + if (util_format_get_blocksizebits(fb->zsbuf->texture->format) == 16) + return FALSE; + + /* Cannot use compression with a linear zbuffer. */ + if (!r300_texture(fb->zsbuf->texture)->desc.microtile) + return FALSE; + + return TRUE; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -132,37 +150,46 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* My notes about fastfill: + /* My notes about Zbuffer compression: * - * 1) Only the zbuffer is cleared. + * 1) The zbuffer must be micro-tiled and whole microtiles must be + * written if compression is enabled. If microtiling is disabled, + * it locks up. * - * 2) The zbuffer must be micro-tiled and whole microtiles must be - * written. If microtiling is disabled, it locks up. + * 2) There is ZMASK RAM which contains a compressed zbuffer. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel tiles, that is 2 bits for each tile. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. On newer chipsets, there is a new compression mode + * with 8x8 pixel tiles per 2 bits. * - * 3) There is Z Mask RAM which contains a compressed zbuffer and - * it interacts with fastfill. We should figure out how to use it - * to get more performance. - * This is what we know about the Z Mask: + * 3) The FASTFILL bit has nothing to do with filling. It only tells hw + * it should look in the ZMASK RAM first before fetching from a real + * zbuffer. * - * Each dword of the Z Mask contains compression information - * for 16 4x4 pixel blocks, that is 2 bits for each block. - * On chips with 2 Z pipes, every other dword maps to a different - * pipe. + * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned + * during zbuffer reads instead of the value that is actually stored + * in the zbuffer memory. A pixel is in a cleared state when its ZMASK + * is equal to 0. Therefore, if you clear ZMASK with zeros, you may + * leave the zbuffer memory uninitialized, but then you must enable + * compression, so that the ZMASK RAM is actually used. * - * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must - * be equal to 0. (clear the Z Mask RAM with zeros) + * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed + * during zbuffer updates. A special decompressing operation should be + * used to fully decompress a zbuffer, which basically just stores all + * compressed tiles in ZMASK to the zbuffer memory. * - * 5) For 16-bit zbuffer, compression causes a hung with one or + * 6) For a 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears * to avoid needless decompression. * - * 7) Fastfill must not be used if reading of compressed Z data is disabled + * 8) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. * - * 8) ZB_CB_CLEAR does not interact with fastfill in any way. + * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. * * - Marek */ @@ -179,25 +206,23 @@ static void r300_clear(struct pipe_context* pipe, boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); - /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { + if (r300_fast_zclear_allowed(r300)) { r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } + if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) r300_mark_atom_dirty(r300, &r300->hiz_clear); + + /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); + * once hiz offset is constant. */ + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable CBZB clear. */ @@ -211,7 +236,7 @@ static void r300_clear(struct pipe_context* pipe, height = surf->cbzb_height; r300->cbzb_clear = TRUE; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Clear. */ @@ -225,7 +250,7 @@ static void r300_clear(struct pipe_context* pipe, buffers, rgba, depth, stencil); r300_blitter_end(r300); } else if (r300->zmask_clear.dirty) { - /* Just clear zmask and hiz now, this does not use a standard draw + /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; @@ -257,16 +282,15 @@ static void r300_clear(struct pipe_context* pipe, if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; hyperz->zb_depthclearvalue = hyperz_dcv; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (zstex && - (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || - zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + if (r300->zmask_in_use || + (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -280,16 +304,16 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); + + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -302,83 +326,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == dst->texture) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } -/* Flush a depth stencil buffer. */ -static void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +void r300_decompress_zmask(struct r300_context *r300) { - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf, surf_tmpl; - struct r300_texture *tex = r300_texture(dst); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!tex->zmask_mem[level]) - return; - if (!tex->zmask_in_use[level]) + if (!r300->zmask_in_use || r300->zmask_locked) return; - surf_tmpl.format = dst->format; - surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); - - r300->z_decomp_rd = TRUE; + r300->zmask_decompress = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, + r300->dsa_decompress_zmask); r300_blitter_end(r300); - r300->z_decomp_rd = FALSE; - tex->zmask_in_use[level] = FALSE; - pipe_surface_reference(&dstsurf, NULL); + r300->zmask_decompress = FALSE; + r300->zmask_in_use = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } -/* We can't use compressed zbuffers as samplers. */ -void r300_flush_depth_textures(struct r300_context *r300) +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct r300_textures_state *state = - (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; - unsigned count = MIN2(state->sampler_view_count, - state->sampler_state_count); - - if (r300->z_decomp_rd) - return; - - for (i = 0; i < count; i++) - if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; + struct pipe_framebuffer_state fb = {0}; + fb.width = r300->locked_zbuffer->width; + fb.height = r300->locked_zbuffer->height; + fb.nr_cbufs = 0; + fb.zsbuf = r300->locked_zbuffer; + + r300->context.set_framebuffer_state(&r300->context, &fb); + r300_decompress_zmask(r300); +} - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; +void r300_decompress_zmask_locked(struct r300_context *r300) +{ + struct pipe_framebuffer_state saved_fb = {0}; - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } - } + util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); + r300_decompress_zmask_locked_unsafe(r300); + r300->context.set_framebuffer_state(&r300->context, &saved_fb); + util_unreference_framebuffer_state(&saved_fb); } /* Copy a block of pixels from one surface to another using HW. */ @@ -393,8 +404,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - - /* Do a copy */ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); r300_blitter_end(r300); @@ -409,10 +418,22 @@ static void r300_resource_copy_region(struct pipe_context *pipe, unsigned src_level, const struct pipe_box *src_box) { + struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; const struct util_format_description *desc = util_format_description(old_format); - boolean is_depth; + + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == src || + fb->zsbuf->texture == dst) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || (!pipe->screen->is_format_supported(pipe->screen, @@ -441,11 +462,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) { - r300_flush_depth_stencil(pipe, src, src_level, src_box->z); - } - if (old_format != new_format) { r300_texture_reinterpret_format(pipe->screen, dst, new_format); @@ -462,6 +478,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe, r300_texture_reinterpret_format(pipe->screen, src, old_format); } + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } void r300_init_blit_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2b183f62c56..15dc6d09eeb 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -424,5 +424,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + /* XXX The 8x8 compression mode doesn't always work (piglit/fbo-depth fails). */ + caps->z_compress = /*caps->is_rv350 ? R300_ZCOMP_8X8 :*/ R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f2035d20092..0be161fa07a 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,6 +33,13 @@ #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 +/* The size of a compressed tile. Each compressed tile takes 2 bits + * in the ZMASK RAM, so there is always 16 tiles per one dword. */ +enum r300_zmask_compression { + R300_ZCOMP_4X4 = 4, + R300_ZCOMP_8X8 = 8 +}; + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -50,10 +57,12 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + /* Some chipsets do not have HiZ RAM - other have varying amounts. */ int hiz_ram; - /* some chipsets have zmask ram per pipe some don't */ + /* Some chipsets have zmask ram per pipe some don't. */ int zmask_ram; + /* Compression mode for ZMASK. */ + enum r300_zmask_compression z_compress; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e265bdbd3b0..552df2b4769 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -94,6 +94,9 @@ static void r300_release_referenced_objects(struct r300_context *r300) remove_from_list(query); FREE(query); } + + r300->context.delete_depth_stencil_alpha_state(&r300->context, + r300->dsa_decompress_zmask); } static void r300_destroy_context(struct pipe_context* context) @@ -116,9 +119,6 @@ static void r300_destroy_context(struct pipe_context* context) /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->zmask_mm) - r300_hyperz_destroy_mm(r300); - if (r300->cs) r300->rws->cs_destroy(r300->cs); @@ -238,7 +238,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (has_hiz_ram) R300_INIT_ATOM(hiz_clear, 0); /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 0); + R300_INIT_ATOM(zmask_clear, 4); } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -513,6 +513,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + struct pipe_depth_stencil_alpha_state dsa = {}; + dsa.depth.writemask = 1; + + r300->dsa_decompress_zmask = + r300->context.create_depth_stencil_alpha_state(&r300->context, + &dsa); + } + return &r300->context; fail: diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 9030f1bb982..6e96ae85ffd 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -406,8 +406,6 @@ struct r300_texture { /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. @@ -589,15 +587,21 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; -#define R300_Z_COMPRESS_44 1 -#define RV350_Z_COMPRESS_88 2 - int z_compression; + boolean cbzb_clear; - boolean z_decomp_rd; + /* Whether ZMASK is enabled. */ + boolean zmask_in_use; + /* Whether ZMASK is being decompressed. */ + boolean zmask_decompress; + /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ + boolean zmask_locked; + /* The zbuffer the ZMASK of which is locked. */ + struct pipe_surface *locked_zbuffer; + + void *dsa_decompress_zmask; /* two mem block managers for hiz/zmask ram space */ struct mem_block *hiz_mm; - struct mem_block *zmask_mm; /* upload managers */ struct u_upload_mgr *upload_vb; @@ -687,7 +691,9 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_textures(struct r300_context *r300); +void r300_decompress_zmask(struct r300_context *r300); +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); +void r300_decompress_zmask_locked(struct r300_context *r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -713,8 +719,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG, + R300_CHANGED_HYPERZ_FLAG, R300_CHANGED_MULTIWRITE }; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d14cdcbbaf0..54e263436bd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -433,6 +433,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) tex = r300_texture(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { @@ -443,14 +444,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); } } + /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } } @@ -462,6 +459,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -1097,17 +1095,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1 END_CS; } -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) @@ -1153,42 +1140,49 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; + struct r300_texture *tex; + unsigned numdw, pipes; + unsigned compsize = r300->screen->caps.z_compress; + /* The tile size of 1 DWORD is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + */ + static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + CS_LOCALS(r300); - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; + if (r300->screen->caps.family == CHIP_FAMILY_RV530) { + pipes = r300->screen->caps.num_z_pipes; + } else { + pipes = r300->screen->caps.num_frag_pipes; + } - height = ALIGN_DIVUP(fb->zsbuf->height, mult); + tex = r300_texture(fb->zsbuf->texture); - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* Get the zbuffer size (with the aligned width and height). */ + numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level], + num_blocks_x_per_dw[pipes-1] * compsize) * + align(fb->zsbuf->height, + num_blocks_y_per_dw[pipes-1] * compsize); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + /* Convert pixels -> dwords. */ + numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * + num_blocks_y_per_dw[pipes-1] * compsize); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(numdw); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index c22e307c679..d996d191755 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300) z->current_func, dsa_state->z_stencil_control); return FALSE; } - } + } return TRUE; } @@ -139,7 +139,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_texture *zstex = fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; - boolean zmask_in_use = FALSE; boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; @@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Z fastfill. */ - if (zmask_in_use) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ - } - /* Zbuffer compression. */ - if (zmask_in_use && r300->z_compression) { - z->zb_bw_cntl |= R300_RD_COMP_ENABLE; - if (r300->z_decomp_rd == false) + if (r300->zmask_in_use && !r300->zmask_locked) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | + /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ + R300_RD_COMP_ENABLE; + + if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } + + if (r300->screen->caps.z_compress == R300_ZCOMP_8X8) { + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } } - /* RV350 and up optimizations. */ - /* The section 10.4.9 in the docs is a lie. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) - z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - - if (hiz_in_use) { - bool can_hiz = r300_can_hiz(r300); - if (can_hiz) { - z->zb_bw_cntl |= R300_HIZ_ENABLE; - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; - z->sc_hyperz |= r300_get_sc_hz_max(r300); - z->zb_bw_cntl |= r300_get_hiz_min(r300); + + if (hiz_in_use && r300_can_hiz(r300)) { + z->zb_bw_cntl |= R300_HIZ_ENABLE | + r300_get_hiz_min(r300); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | + R500_HIZ_EQUAL_REJECT_ENABLE; } } /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; - z->zb_bw_cntl |= - R500_HIZ_EQUAL_REJECT_ENABLE | - R500_PEQ_PACKING_ENABLE | - R500_COVERED_PTR_MASKING_ENABLE; + z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; } } @@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300) r300->hiz_clear.size = height * 4; } -static void r300_update_zmask_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - int mult; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - r300->zmask_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } @@ -324,9 +305,6 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hiz_clear.dirty) { r300_update_hiz_clear(r300); } - if (r300->zmask_clear.dirty) { - r300_update_zmask_clear(r300); - } } void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) @@ -345,43 +323,6 @@ void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) ndw = ALIGN_DIVUP(zsize, 64); tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); - return; -} - -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) -{ - int bsize = 256; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - struct r300_texture *tex; - - tex = r300_texture(surf->base.texture); - - /* We currently don't handle decompression for 3D textures and cubemaps - * correctly. */ - if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D && - tex->desc.b.b.target != PIPE_TEXTURE_RECT) - return; - - /* Cannot flush zmask of 16-bit zbuffers. */ - if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16) - return; - - if (tex->zmask_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - - /* each zmask dword represents 16 4x4 blocks - which is 256 pixels - or 16 8x8 depending on the gb peq flag = 1024 pixels */ - if (compress == RV350_Z_COMPRESS_88) - bsize = 1024; - - ndw = ALIGN_DIVUP(zsize, bsize); - tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); - return; } boolean r300_hyperz_init_mm(struct r300_context *r300) @@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300) struct r300_screen* r300screen = r300->screen; int frag_pipes = r300screen->caps.num_frag_pipes; - r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); - if (!r300->zmask_mm) - return FALSE; - if (r300screen->caps.hiz_ram) { r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); if (!r300->hiz_mm) { - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; return FALSE; } } @@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300) u_mmDestroy(r300->hiz_mm); r300->hiz_mm = NULL; } - - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 30a23ec6493..d4c8e7c60a9 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -28,8 +28,8 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); boolean r300_hyperz_init_mm(struct r300_context *r300); void r300_hyperz_destroy_mm(struct r300_context *r300); + #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2664c1dc834..ba456d413f6 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -694,8 +694,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } if (change == R300_CHANGED_FB_STATE || - change == R300_CHANGED_CBZB_FLAG || - change == R300_CHANGED_ZCLEAR_FLAG) { + change == R300_CHANGED_HYPERZ_FLAG) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -719,8 +718,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } static void - r300_set_framebuffer_state(struct pipe_context* pipe, - const struct pipe_framebuffer_state* state) +r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; @@ -728,7 +727,6 @@ static void boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -744,6 +742,32 @@ static void return; } + if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + /* There is a zmask in use, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { + /* Decompress the currently bound zbuffer before we bind another one. */ + r300_decompress_zmask(r300); + } + } else { + /* We don't bind another zbuffer, so lock the current one. */ + r300->zmask_locked = TRUE; + pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); + } + } else if (r300->zmask_locked && r300->locked_zbuffer) { + /* We have a locked zbuffer now, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { + /* We are binding some other zbuffer, so decompress the locked one, + * it gets unlocked automatically. */ + r300_decompress_zmask_locked_unsafe(r300); + } else { + /* We are binding the locked zbuffer again, so unlock it. */ + r300->zmask_locked = FALSE; + } + } + } + /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300_mark_atom_dirty(r300, &r300->blend_state); @@ -758,14 +782,15 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); + if (!r300->zmask_locked) { + pipe_surface_reference(&r300->locked_zbuffer, NULL); + } + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); r300->validate_buffers = TRUE; - r300->z_compression = false; - if (state->zsbuf) { - blocksize = util_format_get_blocksize(state->zsbuf->texture->format); - switch (blocksize) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; break; @@ -773,30 +798,19 @@ static void zbuffer_bpp = 24; break; } + + /* Setup Hyper-Z. */ if (can_hyperz) { struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex; - int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + struct r300_texture *tex = r300_texture(zs_surf->base.texture); int level = zs_surf->base.u.tex.level; - tex = r300_texture(zs_surf->base.texture); - /* work out whether we can support hiz on this buffer */ r300_hiz_alloc_block(r300, zs_surf); - - /* work out whether we can support zmask features on this buffer */ - r300_zmask_alloc_block(r300, zs_surf, compress); - - if (tex->zmask_mem[level]) { - /* compression causes hangs on 16-bit */ - if (zbuffer_bpp == 24) - r300->z_compression = compress; - } + DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, tex->zmask_mem[level] ? 1 : 0, - tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); } /* Polygon offset depends on the zbuffer bit depth. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 95be7849f8f..de4c2713281 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -862,11 +862,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -void r300_update_derived_state(struct r300_context* r300) +static void r300_decompress_depth_textures(struct r300_context *r300) { - r300_flush_depth_textures(r300); + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct pipe_resource *tex; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + unsigned i; + + if (!r300->zmask_locked || !r300->locked_zbuffer) { + return; + } + + for (i = 0; i < count; i++) { + if (state->sampler_views[i] && state->sampler_states[i]) { + tex = state->sampler_views[i]->base.texture; + if (tex == r300->locked_zbuffer->texture) { + r300_decompress_zmask_locked(r300); + return; + } + } + } +} + +void r300_update_derived_state(struct r300_context* r300) +{ if (r300->textures_state.dirty) { + r300_decompress_depth_textures(r300); r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index a5fbe855e7c..ca2762809dd 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -743,8 +743,6 @@ static void r300_texture_destroy(struct pipe_screen *screen, for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { if (tex->hiz_mem[i]) u_mmFreeMem(tex->hiz_mem[i]); - if (tex->zmask_mem[i]) - u_mmFreeMem(tex->zmask_mem[i]); } FREE(tex); -- cgit v1.2.3 From 2e3ccada076fe275d04d37dc60142230445d1007 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 27 Jan 2011 23:06:15 +0100 Subject: r300g: rename flag squaretiling -> drm_2_1_0 --- src/gallium/drivers/r300/r300_texture.c | 2 +- src/gallium/drivers/r300/r300_texture_desc.c | 2 +- src/gallium/drivers/r300/r300_winsys.h | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_buffer.c | 9 ++++++--- src/gallium/winsys/radeon/drm/radeon_drm_common.c | 8 ++------ src/gallium/winsys/radeon/drm/radeon_r300.c | 4 ++-- src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 +- 7 files changed, 14 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ca2762809dd..ec8608f74bd 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -889,7 +889,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + if (rws->get_value(rws, R300_VID_DRM_2_1_0)) microtile = R300_BUFFER_SQUARETILED; break; } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 7b1739142d4..bc33871565b 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -361,7 +361,7 @@ static void r300_setup_tiling(struct r300_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { desc->microtile = R300_BUFFER_SQUARETILED; } break; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 460da77a4fb..35ed35cca7c 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -50,7 +50,7 @@ enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, R300_VID_Z_PIPES, - R300_VID_SQUARE_TILING_SUPPORT, + R300_VID_DRM_2_1_0, /* Square tiling. */ R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 5e14287ec2d..4b0f688ce9a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -448,15 +448,18 @@ static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws, enum r300_buffer_tiling macrotiled, uint32_t pitch) { +#ifndef RADEON_BO_FLAGS_MICRO_TILE_SQUARE +#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 +#endif + struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); uint32_t flags = 0; + if (microtiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MICRO_TILE; -/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ -#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE else if (microtiled == R300_BUFFER_SQUARETILED) flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; -#endif + if (macrotiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MACRO_TILE; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index fe71f080592..3663c1fff6a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -107,12 +107,8 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) exit(1); } -/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ -#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE - // Supported since 2.1.0. - winsys->squaretiling = version->version_major > 2 || - version->version_minor >= 1; -#endif + winsys->drm_2_1_0 = version->version_major > 2 || + version->version_minor >= 1; winsys->drm_2_3_0 = version->version_major > 2 || version->version_minor >= 3; diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index bacf181b47c..307ae01f5bb 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -145,8 +145,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->gb_pipes; case R300_VID_Z_PIPES: return ws->z_pipes; - case R300_VID_SQUARE_TILING_SUPPORT: - return ws->squaretiling; + case R300_VID_DRM_2_1_0: + return ws->drm_2_1_0; case R300_VID_DRM_2_3_0: return ws->drm_2_3_0; case R300_VID_DRM_2_6_0: diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 492edfef8c3..76954d5d1a8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -46,7 +46,7 @@ struct radeon_drm_winsys { uint32_t z_pipes; /* Z pipe count (rv530 only) */ uint32_t gart_size; /* GART size. */ uint32_t vram_size; /* VRAM size. */ - boolean squaretiling; /* Square tiling support. */ + boolean drm_2_1_0; /* Square tiling support. */ /* DRM 2.3.0 (R500 VAP regs, MSPOS regs, fixed tex3D size checking) */ boolean drm_2_3_0; /* DRM 2.6.0 (Hyper-Z, GB_Z_PEQ_CONFIG allowed on rv350->r4xx, FG_ALPHA_VALUE) */ -- cgit v1.2.3 From 39f16e2aa72d96286296cc0242122a1c5e92897b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 27 Jan 2011 23:13:28 +0100 Subject: r300g: add winsys flag CAN_AACOMPRESS --- src/gallium/drivers/r300/r300_winsys.h | 3 ++- src/gallium/winsys/radeon/drm/radeon_drm_common.c | 25 +++++++++++++++-------- src/gallium/winsys/radeon/drm/radeon_r300.c | 2 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 +++ 4 files changed, 23 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 35ed35cca7c..05b57282254 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -54,7 +54,8 @@ enum r300_value_id { R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */ - R300_CAN_HYPERZ, + R300_CAN_HYPERZ, /* ZMask + HiZ */ + R300_CAN_AACOMPRESS, /* CMask */ }; enum r300_reference_domain { /* bitfield */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index 3663c1fff6a..ba548c4db5b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -43,22 +43,21 @@ #include #include - -/* Enable/disable Hyper-Z access. Return TRUE on success. */ -static boolean radeon_set_hyperz_access(int fd, boolean enable) -{ #ifndef RADEON_INFO_WANT_HYPERZ #define RADEON_INFO_WANT_HYPERZ 7 #endif +#ifndef RADEON_INFO_WANT_CMASK +#define RADEON_INFO_WANT_CMASK 8 +#endif +/* Enable/disable feature access. Return TRUE on success. */ +static boolean radeon_set_fd_access(int fd, unsigned request, boolean enable) +{ struct drm_radeon_info info = {0}; unsigned value = enable ? 1 : 0; - if (!debug_get_bool_option("RADEON_HYPERZ", FALSE)) - return FALSE; - info.value = (unsigned long)&value; - info.request = RADEON_INFO_WANT_HYPERZ; + info.request = request; if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) return FALSE; @@ -148,7 +147,15 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) } winsys->z_pipes = target; - winsys->hyperz = radeon_set_hyperz_access(winsys->fd, TRUE); + if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { + winsys->hyperz = radeon_set_fd_access(winsys->fd, + RADEON_INFO_WANT_HYPERZ, TRUE); + } + + if (debug_get_bool_option("RADEON_CMASK", FALSE)) { + winsys->aacompress = radeon_set_fd_access(winsys->fd, + RADEON_INFO_WANT_CMASK, TRUE); + } retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 307ae01f5bb..91746bae8a9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -155,6 +155,8 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->drm_2_8_0; case R300_CAN_HYPERZ: return ws->hyperz; + case R300_CAN_AACOMPRESS: + return ws->aacompress; } return 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 76954d5d1a8..59e12ff6e6d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -53,8 +53,11 @@ struct radeon_drm_winsys { boolean drm_2_6_0; /* DRM 2.8.0 (US_FORMAT regs, ARGB2101010 colorbuffer) */ boolean drm_2_8_0; + /* Hyper-Z user */ boolean hyperz; + /* AA compression (CMask) */ + boolean aacompress; }; static INLINE struct radeon_drm_winsys * -- cgit v1.2.3 From 82e60236a950100bda7e00308c9b57861274608c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 27 Jan 2011 23:14:17 +0100 Subject: r300g: print driver info if RADEON_DEBUG=info --- src/gallium/drivers/r300/r300_context.c | 27 ++++++++++++++++++++++- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_winsys.h | 14 +++++++++--- src/gallium/winsys/radeon/drm/radeon_drm_common.c | 27 +++-------------------- src/gallium/winsys/radeon/drm/radeon_r300.c | 18 +++++++++++---- src/gallium/winsys/radeon/drm/radeon_winsys.h | 11 ++++----- 7 files changed, 60 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 552df2b4769..c8966ee59a5 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -522,9 +522,34 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, &dsa); } + /* Print driver info. */ +#ifdef NDEBUG + if (DBG_ON(r300, DBG_INFO)) { +#else + { +#endif + fprintf(stderr, + "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" + "r300: GART size: %d MB, VRAM size: %d MB\n" + "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", + rws->get_value(rws, R300_VID_DRM_MAJOR), + rws->get_value(rws, R300_VID_DRM_MINOR), + rws->get_value(rws, R300_VID_DRM_PATCHLEVEL), + screen->get_name(screen), + rws->get_value(rws, R300_VID_PCI_ID), + rws->get_value(rws, R300_VID_GB_PIPES), + rws->get_value(rws, R300_VID_Z_PIPES), + rws->get_value(rws, R300_VID_GART_SIZE) >> 20, + rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, + rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.hiz_ram ? "YES" : "NO"); + } + return &r300->context; - fail: +fail: r300_destroy_context(&r300->context); return NULL; } diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index d6aa90bd053..c6b4804cd8d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -27,6 +27,7 @@ #include static const struct debug_named_value debug_options[] = { + { "info", DBG_INFO, "Print hardware info"}, { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 752f53b7579..973b7926fd1 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -94,6 +94,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) #define DBG_UPLOAD (1 << 14) +#define DBG_INFO (1 << 15) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 05b57282254..2e8ccdde544 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -50,10 +50,18 @@ enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, R300_VID_Z_PIPES, + R300_VID_GART_SIZE, + R300_VID_VRAM_SIZE, + R300_VID_DRM_MAJOR, + R300_VID_DRM_MINOR, + R300_VID_DRM_PATCHLEVEL, + + /* These should probably go away: */ R300_VID_DRM_2_1_0, /* Square tiling. */ - R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ - R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */ + R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ + R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */ + R300_CAN_HYPERZ, /* ZMask + HiZ */ R300_CAN_AACOMPRESS, /* CMask */ }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index ba548c4db5b..2ecf1bb014c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -106,19 +106,9 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) exit(1); } - winsys->drm_2_1_0 = version->version_major > 2 || - version->version_minor >= 1; - - winsys->drm_2_3_0 = version->version_major > 2 || - version->version_minor >= 3; - - winsys->drm_2_6_0 = version->version_major > 2 || - (version->version_major == 2 && - version->version_minor >= 6); - - winsys->drm_2_8_0 = version->version_major > 2 || - (version->version_major == 2 && - version->version_minor >= 8); + winsys->drm_major = version->version_major; + winsys->drm_minor = version->version_minor; + winsys->drm_patchlevel = version->version_patchlevel; info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); @@ -167,17 +157,6 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) winsys->gart_size = gem_info.gart_size; winsys->vram_size = gem_info.vram_size; - debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" - "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" - "radeon: GART size: %d MB VRAM size: %d MB\n" - "radeon: HyperZ: %s\n", - version->version_major, version->version_minor, - version->version_patchlevel, winsys->pci_id, - winsys->gb_pipes, winsys->z_pipes, - winsys->gart_size / 1024 / 1024, - winsys->vram_size / 1024 / 1024, - winsys->hyperz ? "YES" : "NO"); - drmFreeVersion(version); } diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 91746bae8a9..aebe6a60791 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -145,14 +145,24 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return ws->gb_pipes; case R300_VID_Z_PIPES: return ws->z_pipes; + case R300_VID_GART_SIZE: + return ws->gart_size; + case R300_VID_VRAM_SIZE: + return ws->vram_size; + case R300_VID_DRM_MAJOR: + return ws->drm_major; + case R300_VID_DRM_MINOR: + return ws->drm_minor; + case R300_VID_DRM_PATCHLEVEL: + return ws->drm_patchlevel; case R300_VID_DRM_2_1_0: - return ws->drm_2_1_0; + return ws->drm_major*100 + ws->drm_minor >= 201; case R300_VID_DRM_2_3_0: - return ws->drm_2_3_0; + return ws->drm_major*100 + ws->drm_minor >= 203; case R300_VID_DRM_2_6_0: - return ws->drm_2_6_0; + return ws->drm_major*100 + ws->drm_minor >= 206; case R300_VID_DRM_2_8_0: - return ws->drm_2_8_0; + return ws->drm_major*100 + ws->drm_minor >= 208; case R300_CAN_HYPERZ: return ws->hyperz; case R300_CAN_AACOMPRESS: diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 59e12ff6e6d..743c4fbc4a9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -46,13 +46,10 @@ struct radeon_drm_winsys { uint32_t z_pipes; /* Z pipe count (rv530 only) */ uint32_t gart_size; /* GART size. */ uint32_t vram_size; /* VRAM size. */ - boolean drm_2_1_0; /* Square tiling support. */ - /* DRM 2.3.0 (R500 VAP regs, MSPOS regs, fixed tex3D size checking) */ - boolean drm_2_3_0; - /* DRM 2.6.0 (Hyper-Z, GB_Z_PEQ_CONFIG allowed on rv350->r4xx, FG_ALPHA_VALUE) */ - boolean drm_2_6_0; - /* DRM 2.8.0 (US_FORMAT regs, ARGB2101010 colorbuffer) */ - boolean drm_2_8_0; + + unsigned drm_major; + unsigned drm_minor; + unsigned drm_patchlevel; /* Hyper-Z user */ boolean hyperz; -- cgit v1.2.3 From 2050f2ab96f923112d3475a655b31c8f5145a800 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 01:01:01 +0100 Subject: r300g: fix and re-enable 8x8 zbuffer compression mode Also cleanup the whole thing. --- src/gallium/drivers/r300/r300_blit.c | 11 +----- src/gallium/drivers/r300/r300_chipset.c | 3 +- src/gallium/drivers/r300/r300_context.h | 5 +++ src/gallium/drivers/r300/r300_emit.c | 30 +------------- src/gallium/drivers/r300/r300_hyperz.c | 6 +-- src/gallium/drivers/r300/r300_texture_desc.c | 58 ++++++++++++++++++++++++++++ 6 files changed, 69 insertions(+), 44 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index f24d5582e17..d0eb21c8924 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -114,16 +114,7 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - /* Cannot decompress zmask with a 16-bit zbuffer. - * Also compression causes a hung. */ - if (util_format_get_blocksizebits(fb->zsbuf->texture->format) == 16) - return FALSE; - - /* Cannot use compression with a linear zbuffer. */ - if (!r300_texture(fb->zsbuf->texture)->desc.microtile) - return FALSE; - - return TRUE; + return r300_texture(fb->zsbuf->texture)->desc.zmask_dwords[fb->zsbuf->u.tex.level]; } static uint32_t r300_depth_clear_value(enum pipe_format format, diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 15dc6d09eeb..593eadb9c7d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -424,7 +424,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; - /* XXX The 8x8 compression mode doesn't always work (piglit/fbo-depth fails). */ - caps->z_compress = /*caps->is_rv350 ? R300_ZCOMP_8X8 :*/ R300_ZCOMP_4X4; + caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6e96ae85ffd..57ecfb168f8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -387,6 +387,11 @@ struct r300_texture_desc { /* Whether CBZB fast color clear is allowed on the miplevel. */ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; + + /* Zbuffer compression info for each miplevel. */ + boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; + /* If zero, then disable compression. */ + unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; }; struct r300_texture { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 54e263436bd..2157cb3ede7 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1141,42 +1141,14 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_texture *tex; - unsigned numdw, pipes; - unsigned compsize = r300->screen->caps.z_compress; - /* The tile size of 1 DWORD is: - * - * GPU Pipes 4x4 mode 8x8 mode - * ------------------------------------------ - * R580 4P/1Z 32x32 64x64 - * RV570 3P/1Z 48x16 96x32 - * RV530 1P/2Z 32x16 64x32 - */ - static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; - static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; CS_LOCALS(r300); - if (r300->screen->caps.family == CHIP_FAMILY_RV530) { - pipes = r300->screen->caps.num_z_pipes; - } else { - pipes = r300->screen->caps.num_frag_pipes; - } - tex = r300_texture(fb->zsbuf->texture); - /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level], - num_blocks_x_per_dw[pipes-1] * compsize) * - align(fb->zsbuf->height, - num_blocks_y_per_dw[pipes-1] * compsize); - - /* Convert pixels -> dwords. */ - numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * - num_blocks_y_per_dw[pipes-1] * compsize); - BEGIN_CS(size); OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); OUT_CS(0); - OUT_CS(numdw); + OUT_CS(tex->desc.zmask_dwords[fb->zsbuf->u.tex.level]); OUT_CS(0); END_CS; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index d996d191755..7767275e67e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -168,10 +168,10 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; } + } - if (r300->screen->caps.z_compress == R300_ZCOMP_8X8) { - z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; - } + if (zstex->desc.zcomp8x8[fb->zsbuf->u.tex.level]) { + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } if (hiz_in_use && r300_can_hiz(r300)) { diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index bc33871565b..83469f720bc 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -334,6 +334,63 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_setup_zmask_flags(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The tile size of 1 DWORD is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + * 1P/1Z 16x16 32x32 + */ + static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + if (util_format_is_depth_or_stencil(desc->b.b.format) && + util_format_get_blocksizebits(desc->b.b.format) == 32 && + desc->microtile) { + unsigned i, pipes; + + if (screen->caps.family == CHIP_FAMILY_RV530) { + pipes = screen->caps.num_z_pipes; + } else { + pipes = screen->caps.num_frag_pipes; + } + + for (i = 0; i <= desc->b.b.last_level; i++) { + unsigned numdw, compsize; + + /* The 8x8 compression mode needs macrotiling. */ + compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + desc->macrotile[i] ? 8 : 4; + + /* Get the zbuffer size (with the aligned width and height). */ + numdw = align(desc->stride_in_pixels[i], + num_blocks_x_per_dw[pipes-1] * compsize) * + align(u_minify(desc->b.b.height0, i), + num_blocks_y_per_dw[pipes-1] * compsize); + + /* Convert pixels -> dwords. */ + numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * + num_blocks_y_per_dw[pipes-1] * compsize); + + /* Check that we have enough ZMASK memory. */ + if (numdw <= screen->caps.zmask_ram * pipes) { + desc->zmask_dwords[i] = numdw; + desc->zcomp8x8[i] = compsize == 8; + } else { + desc->zmask_dwords[i] = 0; + desc->zcomp8x8[i] = FALSE; + } + } + } +} + static void r300_setup_tiling(struct r300_screen *screen, struct r300_texture_desc *desc) { @@ -439,6 +496,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_setup_zmask_flags(rscreen, desc); if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ -- cgit v1.2.3 From baf2a795eb357aaf33fdee166003fa38768cb193 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 01:15:17 +0100 Subject: r300g: 8x8-compressed zbuffer can only be point-sampled --- src/gallium/drivers/r300/r300_texture_desc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 83469f720bc..4faa88f4c52 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -367,7 +367,8 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, /* The 8x8 compression mode needs macrotiling. */ compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && - desc->macrotile[i] ? 8 : 4; + desc->macrotile[i] && + desc->b.b.nr_samples <= 1 ? 8 : 4; /* Get the zbuffer size (with the aligned width and height). */ numdw = align(desc->stride_in_pixels[i], -- cgit v1.2.3 From 588c925224c15c3ea3e26bdeb0ef92391a0617e4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 01:56:57 +0100 Subject: r300g: handle PIPE_CAP_ARRAY_TEXTURES --- src/gallium/drivers/r300/r300_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 880372ec83f..fc8131f03be 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -127,6 +127,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_ARRAY_TEXTURES: return 0; /* Texturing. */ -- cgit v1.2.3 From 6dc0a0e71f3a911437a88798145e18d176703482 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 01:58:30 +0100 Subject: r600g: handle PIPE_CAP_ARRAY_TEXTURES --- src/gallium/drivers/r600/r600_pipe.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 1ee327f16af..9245dba5761 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -287,6 +287,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_ARRAY_TEXTURES: return 0; /* Texturing. */ -- cgit v1.2.3 From 0029979eee6ef537592c7bb6b6005fa2ef0729da Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 17:15:22 +0100 Subject: r300g: fix resource_copy_region for DXT SRGB formats --- src/gallium/drivers/r300/r300_blit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d0eb21c8924..69f8115c32c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -426,13 +426,13 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || - (!pipe->screen->is_format_supported(pipe->screen, + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || + !pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0) && - desc->layout == UTIL_FORMAT_LAYOUT_PLAIN)) { + PIPE_BIND_SAMPLER_VIEW, 0))) { switch (util_format_get_blocksize(old_format)) { case 1: new_format = PIPE_FORMAT_I8_UNORM; -- cgit v1.2.3 From 80777743b7b6238f034b8cb81d8d907d74929334 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 28 Jan 2011 20:25:27 -0700 Subject: softpipe: fix array textures to use resource array_size Don't use height for 1D array textures or depth for 2D array textures. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 61 +++++++++++++++++++----- src/gallium/drivers/softpipe/sp_tex_tile_cache.c | 17 +++++-- src/gallium/drivers/softpipe/sp_texture.c | 31 ++++++++++-- 3 files changed, 88 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 15f7eb2b94e..8a4ef934348 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -774,6 +774,43 @@ get_texel_3d(const struct sp_sampler_variant *samp, } +/* Get texel pointer for 1D array texture */ +static INLINE const float * +get_texel_1d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) u_minify(texture->width0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_2d_no_border(samp, addr, x, y); + } +} + + +/* Get texel pointer for 2D array texture */ +static INLINE const float * +get_texel_2d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y, int layer) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + assert(layer < texture->array_size); + + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_3d_no_border(samp, addr, x, y, layer); + } +} + + /** * Given the logbase2 of a mipmap's base level size and a mipmap level, * return the size (in texels) of that mipmap level. @@ -1027,10 +1064,10 @@ img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler, addr.bits.level = samp->level; samp->nearest_texcoord_s(s, width, x); - wrap_array_layer(t, texture->height0, layer); + wrap_array_layer(t, texture->array_size, layer); for (j = 0; j < QUAD_SIZE; j++) { - const float *out = get_texel_2d(samp, addr, x[j], layer[j]); + const float *out = get_texel_1d_array(samp, addr, x[j], layer[j]); int c; for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; @@ -1115,10 +1152,10 @@ img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler, samp->nearest_texcoord_s(s, width, x); samp->nearest_texcoord_t(t, height, y); - wrap_array_layer(p, texture->depth0, layer); + wrap_array_layer(p, texture->array_size, layer); for (j = 0; j < QUAD_SIZE; j++) { - const float *out = get_texel_3d(samp, addr, x[j], y[j], layer[j]); + const float *out = get_texel_2d_array(samp, addr, x[j], y[j], layer[j]); int c; for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; @@ -1291,11 +1328,11 @@ img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler, addr.bits.level = samp->level; samp->linear_texcoord_s(s, width, x0, x1, xw); - wrap_array_layer(t, texture->height0, layer); + wrap_array_layer(t, texture->array_size, layer); for (j = 0; j < QUAD_SIZE; j++) { - const float *tx0 = get_texel_2d(samp, addr, x0[j], layer[j]); - const float *tx1 = get_texel_2d(samp, addr, x1[j], layer[j]); + const float *tx0 = get_texel_1d_array(samp, addr, x0[j], layer[j]); + const float *tx1 = get_texel_1d_array(samp, addr, x1[j], layer[j]); int c; /* interpolate R, G, B, A */ @@ -1382,13 +1419,13 @@ img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler, samp->linear_texcoord_s(s, width, x0, x1, xw); samp->linear_texcoord_t(t, height, y0, y1, yw); - wrap_array_layer(p, texture->depth0, layer); + wrap_array_layer(p, texture->array_size, layer); for (j = 0; j < QUAD_SIZE; j++) { - const float *tx0 = get_texel_3d(samp, addr, x0[j], y0[j], layer[j]); - const float *tx1 = get_texel_3d(samp, addr, x1[j], y0[j], layer[j]); - const float *tx2 = get_texel_3d(samp, addr, x0[j], y1[j], layer[j]); - const float *tx3 = get_texel_3d(samp, addr, x1[j], y1[j], layer[j]); + const float *tx0 = get_texel_2d_array(samp, addr, x0[j], y0[j], layer[j]); + const float *tx1 = get_texel_2d_array(samp, addr, x1[j], y0[j], layer[j]); + const float *tx2 = get_texel_2d_array(samp, addr, x0[j], y1[j], layer[j]); + const float *tx3 = get_texel_2d_array(samp, addr, x1[j], y1[j], layer[j]); int c; /* interpolate R, G, B, A */ diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index 5105e77d436..e589ee7c841 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -251,6 +251,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_level != addr.bits.level || tc->tex_z != addr.bits.z) { /* get new transfer (view into texture) */ + unsigned width, height, layer; if (tc->tex_trans) { if (tc->tex_trans_map) { @@ -262,14 +263,22 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_trans = NULL; } + width = u_minify(tc->texture->width0, addr.bits.level); + if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = tc->texture->array_size; + layer = 0; + } + else { + height = u_minify(tc->texture->height0, addr.bits.level); + layer = addr.bits.face + addr.bits.z; + } + tc->tex_trans = pipe_get_transfer(tc->pipe, tc->texture, addr.bits.level, - addr.bits.face + addr.bits.z, + layer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, - 0, 0, - u_minify(tc->texture->width0, addr.bits.level), - u_minify(tc->texture->height0, addr.bits.level)); + 0, 0, width, height); tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 2daed2022e9..95374c34ec3 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -62,13 +62,21 @@ softpipe_resource_layout(struct pipe_screen *screen, unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + spr->stride[level] = util_format_get_stride(pt->format, width); spr->level_offset[level] = buffer_size; buffer_size += (util_format_get_nblocksy(pt->format, height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - spr->stride[level]); + slices * spr->stride[level]); width = u_minify(width, 1); height = u_minify(height, 1); @@ -296,7 +304,7 @@ softpipe_surface_destroy(struct pipe_context *pipe, * a resource object. * \param pipe rendering context * \param resource the resource to transfer in/out of - * \param sr indicates cube face or 3D texture slice + * \param level which mipmap level * \param usage bitmask of PIPE_TRANSFER_x flags * \param box the 1D/2D/3D region of interest */ @@ -315,8 +323,21 @@ softpipe_get_transfer(struct pipe_context *pipe, /* make sure the requested region is in the image bounds */ assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); + if (resource->target == PIPE_TEXTURE_1D_ARRAY) { + assert(box->y + box->height <= resource->array_size); + } + else { + assert(box->y + box->height <= u_minify(resource->height0, level)); + if (resource->target == PIPE_TEXTURE_2D_ARRAY) { + assert(box->z + box->depth <= resource->array_size); + } + else if (resource->target == PIPE_TEXTURE_CUBE) { + assert(box->z < 6); + } + else { + assert(box->z + box->depth <= (u_minify(resource->depth0, level))); + } + } /* * Transfers, like other pipe operations, must happen in order, so flush the -- cgit v1.2.3 From 7fd29468ec68b5cd08222428577a7dbe8f123426 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 29 Jan 2011 15:06:22 +0100 Subject: nvc0: enable PIPE_CAP_ARRAY_TEXTURES and fix them --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 2 ++ src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++----- src/gallium/drivers/nvc0/nvc0_transfer.c | 38 ++++++++++++++++++++---------- 4 files changed, 34 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 88a59cfb518..644b9ef61a1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i) { int src1 = i->tex_array + i->tex_dim + i->tex_cube; + assert(src1 < 6); + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 68f3867fd0e..88daf31d46a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SWIZZLE: diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index be1bb44931d..9b5d4290787 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1198,11 +1198,6 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *cube = 1; break; - /* - case TGSI_TEXTURE_CUBE_ARRAY: - *dim = 2; - *cube = *array = 1; - break; case TGSI_TEXTURE_1D_ARRAY: *dim = *array = 1; break; @@ -1210,6 +1205,7 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *array = 1; break; + /* case TGSI_TEXTURE_SHADOW1D_ARRAY: *dim = *array = *shadow = 1; break; @@ -1219,7 +1215,7 @@ describe_texture_target(unsigned target, int *dim, break; case TGSI_TEXTURE_CUBE_ARRAY: *dim = 2; - *array = *cube = 1; + *cube = *array = 1; break; */ default: @@ -1382,6 +1378,7 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, nvi->tex_dim = dim; nvi->tex_cube = cube; nvi->tex_shadow = shadow; + nvi->tex_array = array; nvi->tex_live = 0; return nvi; @@ -1402,7 +1399,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], assert(dim + array + shadow + lodbias <= 5); - if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) load_proj_tex_coords(bld, t, dim, shadow, insn); else { for (c = 0; c < dim + cube + array; ++c) diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 286b382f58e..92e006cba49 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -10,7 +10,8 @@ struct nvc0_transfer { struct pipe_transfer base; struct nvc0_m2mf_rect rect[2]; uint32_t nblocksx; - uint32_t nblocksy; + uint16_t nblocksy; + uint16_t nlayers; }; static void @@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct nvc0_miptree_level *lvl = &mt->level[level]; struct nvc0_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; + uint32_t w, h, d, z, layer, box_h, box_y; int ret; + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + box_y = box->y; + box_h = box->height; + if (mt->layout_3d) { z = box->z; d = u_minify(res->depth0, level); layer = 0; + tx->nlayers = box->depth; } else { z = 0; d = 1; - layer = box->z; + if (res->target == PIPE_TEXTURE_1D || + res->target == PIPE_TEXTURE_1D_ARRAY) { + box_y = 0; + box_h = 1; + layer = box->y; + tx->nlayers = box->height; + } else { + layer = box->z; + tx->nlayers = box->depth; + } } - tx = CALLOC_STRUCT(nvc0_transfer); - if (!tx) - return NULL; - pipe_resource_reference(&tx->base.resource, res); tx->base.level = level; @@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->base.box = *box; tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + tx->nblocksy = util_format_get_nblocksy(res->format, box_h); tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; @@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base = lvl->offset + layer * mt->layer_stride; tx->rect[0].tile_mode = lvl->tile_mode; tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].y = util_format_get_nblocksy(res->format, box_y); tx->rect[0].z = z; tx->rect[0].width = util_format_get_nblocksx(res->format, w); tx->rect[0].height = util_format_get_nblocksy(res->format, h); @@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, size = tx->base.layer_stride; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - size * tx->base.box.depth, &tx->rect[1].bo); + size * tx->nlayers, &tx->rect[1].bo); if (ret) { FREE(tx); return NULL; @@ -305,7 +319,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_READ) { unsigned i; - for (i = 0; i < box->depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) @@ -331,7 +345,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { - for (i = 0; i < tx->base.box.depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) -- cgit v1.2.3 From f8a7a0b6f30ff38b2743860cbc4caeab102c2c29 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 30 Jan 2011 01:24:56 +0100 Subject: nvc0: implement transform feedback state --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 20 +++++-- src/gallium/drivers/nvc0/nvc0_context.h | 9 ++++ src/gallium/drivers/nvc0/nvc0_shader_state.c | 58 +++++++++++++++++++- src/gallium/drivers/nvc0/nvc0_state.c | 73 ++++++++++++++++++++++++++ src/gallium/drivers/nvc0/nvc0_state_validate.c | 3 +- src/gallium/drivers/nvc0/nvc0_stateobj.h | 8 +-- src/gallium/drivers/nvc0/nvc0_vbo.c | 51 +----------------- 7 files changed, 163 insertions(+), 59 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index af6526c8759..1a34313912c 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + #define NVC0_3D_TESS_MODE 0x00000320 #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 @@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) -#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 +#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 #define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 #define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 #define NVC0_3D_TFB_ENABLE 0x00000744 @@ -1157,9 +1171,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 -#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 -#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 #define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 94117988e50..a082ad4575c 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -54,6 +54,8 @@ #define NVC0_NEW_CONSTBUF (1 << 18) #define NVC0_NEW_TEXTURES (1 << 19) #define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB (1 << 21) +#define NVC0_NEW_TFB_BUFFERS (1 << 22) #define NVC0_BUFCTX_CONSTANT 0 #define NVC0_BUFCTX_FRAME 1 @@ -123,6 +125,11 @@ struct nvc0_context { boolean vbo_dirty; boolean vbo_push_hint; + struct nvc0_transform_feedback_state *tfb; + struct pipe_resource *tfbbuf[4]; + unsigned num_tfbbufs; + unsigned tfb_offset[4]; + struct draw_context *draw; }; @@ -177,6 +184,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *); void nvc0_gmtyprog_validate(struct nvc0_context *); void nvc0_fragprog_validate(struct nvc0_context *); +void nvc0_tfb_validate(struct nvc0_context *); + /* nvc0_state.c */ extern void nvc0_init_state_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 981b5488d08..633641713dc 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -55,7 +55,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base + NVC0_SHADER_HEADER_SIZE, prog->code_size, prog->code); - BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); + BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); OUT_RING (nvc0->screen->base.channel, 0x1111); return TRUE; @@ -178,3 +178,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); OUT_RING (chan, gp->max_gpr); } + +/* It's *is* kind of shader related. We need to inspect the program + * to get the output locations right. + */ +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp; + struct nvc0_transform_feedback_state *tfb = nvc0->tfb; + int b; + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + if (!tfb) { + OUT_RING(chan, 0); + return; + } + OUT_RING(chan, 1); + + vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + uint8_t idx, var[128]; + int i, n; + struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]); + + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + + if (!(nvc0->dirty & NVC0_NEW_TFB)) + continue; + + BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + n = b ? tfb->varying_count[b - 1] : 0; + i = 0; + for (; i < tfb->varying_count[b]; ++i) { + idx = tfb->varying_index[n + i]; + var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + } + for (; i & 3; ++i) + var[i] = 0; + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + } + for (; b < 4; ++b) + IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index c08f3693f5e..f6a7f824d58 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -808,6 +808,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nvc0->dirty |= NVC0_NEW_VERTEX; } +static void * +nvc0_tfb_state_create(struct pipe_context *pipe, + const struct pipe_stream_output_state *pso) +{ + struct nvc0_transform_feedback_state *so; + int n = 0; + int i, c, b; + + so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!so) + return NULL; + + for (b = 0; b < 4; ++b) { + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output_buffer[i] != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->register_mask[i] & (1 << c))) + continue; + so->varying_count[b]++; + so->varying_index[n++] = (pso->register_index[i] << 2) | c; + } + } + so->stride[b] = so->varying_count[b] * 4; + } + if (pso->stride) + so->stride[0] = pso->stride; + + return so; +} + +static void +nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +{ + nvc0_context(pipe)->tfb = hwcso; + nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; +} + +static void +nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int i; + + assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + + for (i = 0; i < num_buffers; ++i) { + assert(offsets[i] >= 0); + nvc0->tfb_offset[i] = offsets[i]; + pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + + nvc0->num_tfbbufs = num_buffers; + + nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; +} + void nvc0_init_state_functions(struct nvc0_context *nvc0) { @@ -861,5 +929,10 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; + + nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create; + nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; + nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; + nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 6419011132a..7406f6c7917 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -436,7 +436,8 @@ static struct state_validate { { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 6c8028aba13..752e927e2aa 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -69,14 +69,14 @@ struct nvc0_vertex_stateobj { uint32_t instance_bufs; unsigned vtx_size; unsigned vtx_per_packet_max; - struct nvc0_vertex_element element[1]; + struct nvc0_vertex_element element[0]; }; /* will have to lookup index -> location qualifier from nvc0_program */ -struct nvc0_tfb_state { - uint8_t varying_count[4]; +struct nvc0_transform_feedback_state { uint32_t stride[4]; - uint8_t varying_indices[1]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; }; #endif diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 486909c1eb0..aa5decfc233 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -54,7 +54,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, assert(num_elements); so = MALLOC(sizeof(*so) + - (num_elements - 1) * sizeof(struct nvc0_vertex_element)); + num_elements * sizeof(struct nvc0_vertex_element)); if (!so) return NULL; so->num_elements = num_elements; @@ -351,55 +351,6 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) nvc0_bufctx_emit_relocs(nvc0); } -#if 0 -static struct nouveau_bo * -nvc0_tfb_setup(struct nvc0_context *nvc0) -{ - struct nouveau_channel *chan = nvc0->screen->base.channel; - struct nouveau_bo *tfb = NULL; - int ret, i; - - ret = nouveau_bo_new(nvc0->screen->base.device, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); - if (ret) - return NULL; - - ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); - if (ret) - return NULL; - memset(tfb->map, 0xee, 8 * 4 * 3); - nouveau_bo_unmap(tfb); - - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); - OUT_RING (chan, 1); - OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, tfb->size); - OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ - BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); - OUT_RING (chan, 0); - OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ - OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ - BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); - OUT_RING (chan, 0x1f1e1d1c); - OUT_RING (chan, 0xa3a2a1a0); - for (i = 1; i < 4; ++i) { - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 0); - - return tfb; -} -#endif - static void nvc0_draw_arrays(struct nvc0_context *nvc0, unsigned mode, unsigned start, unsigned count, -- cgit v1.2.3 From 02f8f134643f631364ca621fe0b6d6b72449e00c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 03:03:38 +0100 Subject: r600g: add back u_upload_mgr integration I can't see a performance difference with this code, which means all the driver-specific code removed in this commit was unnecessary. Now we use u_upload_mgr in a slightly different way than we did before it got dropped. I am not restoring the original code "as is" due to latest u_upload_mgr changes that r300g performance benefits from. This also fixes: - piglit/fp-kil --- src/gallium/drivers/r600/Makefile | 3 +- src/gallium/drivers/r600/SConscript | 1 - src/gallium/drivers/r600/evergreen_state.c | 11 ++- src/gallium/drivers/r600/r600_buffer.c | 94 +++++++++------------- src/gallium/drivers/r600/r600_pipe.c | 20 +++-- src/gallium/drivers/r600/r600_pipe.h | 10 +-- src/gallium/drivers/r600/r600_resource.h | 19 +---- src/gallium/drivers/r600/r600_state.c | 13 +-- src/gallium/drivers/r600/r600_state_common.c | 2 +- src/gallium/drivers/r600/r600_upload.c | 114 --------------------------- 10 files changed, 73 insertions(+), 214 deletions(-) delete mode 100644 src/gallium/drivers/r600/r600_upload.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index b476b9af3b8..a484f38e9f1 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -21,7 +21,6 @@ C_SOURCES = \ evergreen_state.c \ eg_asm.c \ r600_translate.c \ - r600_state_common.c \ - r600_upload.c + r600_state_common.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index e51f50c5df5..5a5fa6d65fd 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -28,7 +28,6 @@ r600 = env.ConvenienceLibrary( 'r600_state_common.c', 'r600_texture.c', 'r600_translate.c', - 'r600_upload.c', 'r700_asm.c', 'evergreen_state.c', 'eg_asm.c', diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 306ca03234f..2c9dd479979 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -841,7 +841,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, struct pipe_resource *buffer) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -851,7 +851,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, return; } - r600_upload_const_buffer(rctx, buffer, &offset); + r600_upload_const_buffer(rctx, &rbuffer, &offset); switch (shader) { case PIPE_SHADER_VERTEX: @@ -862,7 +862,7 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -873,13 +873,16 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: R600_ERR("unsupported %d\n", shader); return; } + + if (!rbuffer->user_buffer) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); } void evergreen_init_state_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 469c8195fe9..f2247546062 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,15 +29,18 @@ #include #include #include +#include "util/u_upload_mgr.h" + #include "state_tracker/drm_driver.h" + #include #include "radeon_drm.h" + #include "r600.h" #include "r600_pipe.h" extern struct u_resource_vtbl r600_buffer_vtbl; - struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { @@ -58,7 +61,6 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, rbuffer->r.base.vtbl = &r600_buffer_vtbl; rbuffer->r.size = rbuffer->r.base.b.width0; rbuffer->r.bo_size = rbuffer->r.size; - rbuffer->uploaded = FALSE; bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); if (bo == NULL) { FREE(rbuffer); @@ -94,7 +96,6 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; rbuffer->user_buffer = ptr; - rbuffer->uploaded = FALSE; return &rbuffer->r.base.b; } @@ -198,32 +199,23 @@ struct u_resource_vtbl r600_buffer_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - if (r600_buffer_is_user_buffer(draw->index_buffer)) { + + if (r600_is_user_buffer(draw->index_buffer)) { struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); - unsigned upload_offset; - int ret = 0; - - ret = r600_upload_buffer(rctx->rupload_vb, - draw->index_buffer_offset, - draw->count * draw->index_size, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - draw->index_buffer_offset = upload_offset; - } + boolean flushed; - return 0; + u_upload_data(rctx->upload_vb, 0, + draw->count * draw->index_size, + rbuffer->user_buffer, + &draw->index_buffer_offset, + &draw->index_buffer, &flushed); + } } -int r600_upload_user_buffers(struct r600_pipe_context *rctx) +void r600_upload_user_buffers(struct r600_pipe_context *rctx) { - enum pipe_error ret = PIPE_OK; int i, nr; nr = rctx->vertex_elements->count; @@ -232,47 +224,33 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx) for (i = 0; i < nr; i++) { struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - if (r600_buffer_is_user_buffer(vb->buffer)) { + if (r600_is_user_buffer(vb->buffer)) { struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer); - unsigned upload_offset; - - ret = r600_upload_buffer(rctx->rupload_vb, - 0, vb->buffer->width0, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - vb->buffer_offset = upload_offset; + boolean flushed; + + u_upload_data(rctx->upload_vb, 0, + vb->buffer->width0, + rbuffer->user_buffer, + &vb->buffer_offset, + &vb->buffer, + &flushed); } } - return ret; } - -int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *const_offset) { - if (r600_buffer_is_user_buffer(cbuffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer); - unsigned upload_offset; - int ret = 0; - - ret = r600_upload_buffer(rctx->rupload_const, - 0, cbuffer->width0, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - *const_offset = upload_offset; - return 0; - } + if ((*rbuffer)->user_buffer) { + uint8_t *ptr = (*rbuffer)->user_buffer; + unsigned size = (*rbuffer)->r.base.b.width0; + boolean flushed; - *const_offset = 0; - return 0; + *rbuffer = NULL; + + u_upload_data(rctx->upload_const, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + } else { + *const_offset = 0; + } } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9245dba5761..824cf7fc46a 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -35,6 +35,7 @@ #include #include #include +#include "util/u_upload_mgr.h" #include #include "r600.h" #include "r600d.h" @@ -68,8 +69,8 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, #endif r600_context_flush(&rctx->ctx); - r600_upload_flush(rctx->rupload_vb); - r600_upload_flush(rctx->rupload_const); + u_upload_flush(rctx->upload_vb); + u_upload_flush(rctx->upload_const); } static void r600_destroy_context(struct pipe_context *context) @@ -88,8 +89,8 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - r600_upload_destroy(rctx->rupload_vb); - r600_upload_destroy(rctx->rupload_const); + u_upload_destroy(rctx->upload_vb); + u_upload_destroy(rctx->upload_const); if (rctx->tran.translate_cache) translate_cache_destroy(rctx->tran.translate_cache); @@ -167,14 +168,17 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16); - if (rctx->rupload_vb == NULL) { + rctx->upload_vb = u_upload_create(&rctx->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER); + if (rctx->upload_vb == NULL) { r600_destroy_context(&rctx->context); return NULL; } - rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256); - if (rctx->rupload_const == NULL) { + rctx->upload_const = u_upload_create(&rctx->context, 1024 * 1024, 256, + PIPE_BIND_CONSTANT_BUFFER); + if (rctx->upload_const == NULL) { r600_destroy_context(&rctx->context); return NULL; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 7f74fda0daf..8d5e3c3b553 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -131,8 +131,6 @@ struct r600_translate_context { #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 -struct r600_upload; - struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -164,12 +162,12 @@ struct r600_pipe_context { /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct r600_upload *rupload_vb; + struct u_upload_mgr *upload_vb; unsigned any_user_vbs; struct r600_textures_info ps_samplers; unsigned vb_max_index; struct r600_translate_context tran; - struct r600_upload *rupload_const; + struct u_upload_mgr *upload_const; }; struct r600_drawl { @@ -210,8 +208,8 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -int r600_upload_user_buffers(struct r600_pipe_context *rctx); +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); +void r600_upload_user_buffers(struct r600_pipe_context *rctx); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 28b3e1e5e40..6e302444712 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -71,7 +71,6 @@ struct r600_resource_buffer { struct r600_resource r; uint32_t magic; void *user_buffer; - bool uploaded; }; struct r600_surface { @@ -98,10 +97,8 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf return NULL; } -static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) +static INLINE boolean r600_is_user_buffer(struct pipe_resource *buffer) { - if (r600_buffer(buffer)->uploaded) - return FALSE; return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } @@ -121,15 +118,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); struct r600_pipe_context; -struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, - unsigned default_size, - unsigned alignment); -void r600_upload_flush(struct r600_upload *upload); -void r600_upload_destroy(struct r600_upload *upload); -int r600_upload_buffer(struct r600_upload *upload, unsigned offset, - unsigned size, struct r600_resource_buffer *in_buffer, - unsigned *out_offset, unsigned *out_size, - struct r600_bo **out_buffer); - -int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset); + +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset); + #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index d678f42ad67..0191a119d5d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1105,10 +1105,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) + struct pipe_resource *buffer) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -1118,7 +1118,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint return; } - r600_upload_const_buffer(rctx, buffer, &offset); + r600_upload_const_buffer(rctx, &rbuffer, &offset); switch (shader) { case PIPE_SHADER_VERTEX: @@ -1129,7 +1129,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); break; case PIPE_SHADER_FRAGMENT: @@ -1140,13 +1140,16 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); break; default: R600_ERR("unsupported %d\n", shader); return; } + + if (!rbuffer->user_buffer) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); } void r600_init_state_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3603376f738..9c4fd3b16dc 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -198,7 +198,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, rctx->vertex_buffer[i].buffer = NULL; if (buffers[i].buffer == NULL) continue; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) + if (r600_is_user_buffer(buffers[i].buffer)) rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c deleted file mode 100644 index 44102ff55b6..00000000000 --- a/src/gallium/drivers/r600/r600_upload.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "r600.h" -#include "r600_pipe.h" -#include "r600_resource.h" - -struct r600_upload { - struct r600_pipe_context *rctx; - struct r600_bo *buffer; - char *ptr; - unsigned size; - unsigned default_size; - unsigned total_alloc_size; - unsigned offset; - unsigned alignment; -}; - -struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, - unsigned default_size, - unsigned alignment) -{ - struct r600_upload *upload = CALLOC_STRUCT(r600_upload); - - if (upload == NULL) - return NULL; - - upload->rctx = rctx; - upload->size = 0; - upload->default_size = default_size; - upload->alignment = alignment; - upload->ptr = NULL; - upload->buffer = NULL; - upload->total_alloc_size = 0; - - return upload; -} - -void r600_upload_flush(struct r600_upload *upload) -{ - if (upload->buffer) { - r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); - } - upload->default_size = MAX2(upload->total_alloc_size, upload->default_size); - upload->total_alloc_size = 0; - upload->size = 0; - upload->offset = 0; - upload->ptr = NULL; - upload->buffer = NULL; -} - -void r600_upload_destroy(struct r600_upload *upload) -{ - r600_upload_flush(upload); - FREE(upload); -} - -int r600_upload_buffer(struct r600_upload *upload, unsigned offset, - unsigned size, struct r600_resource_buffer *in_buffer, - unsigned *out_offset, unsigned *out_size, - struct r600_bo **out_buffer) -{ - unsigned alloc_size = align(size, upload->alignment); - const void *in_ptr = NULL; - - if (upload->offset + alloc_size > upload->size) { - if (upload->size) { - r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); - } - upload->size = align(MAX2(upload->default_size, alloc_size), 4096); - upload->total_alloc_size += upload->size; - upload->offset = 0; - upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0); - if (upload->buffer == NULL) { - return -ENOMEM; - } - upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL); - } - - in_ptr = in_buffer->user_buffer; - memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size); - *out_offset = upload->offset; - *out_size = upload->size; - *out_buffer = NULL; - r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer); - upload->offset += alloc_size; - - return 0; -} -- cgit v1.2.3 From 5cefe1eddd4854490aebdf4f138ffb07aa59073c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 21:55:28 +0100 Subject: r600g: make r600_drawl inherit pipe_draw_info --- src/gallium/drivers/r600/evergreen_state.c | 37 +++++++++++------------------- src/gallium/drivers/r600/r600_buffer.c | 2 +- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 9 ++------ src/gallium/drivers/r600/r600_state.c | 34 ++++++++++----------------- 5 files changed, 30 insertions(+), 54 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 2c9dd479979..7fb64d2b658 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1432,43 +1432,34 @@ void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) } int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) + +void evergreen_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; u32 vgt_dma_index_type, vgt_draw_initiator, mask; struct r600_draw rdraw; struct r600_pipe_state vgt; - struct r600_drawl draw; + struct r600_drawl draw = {}; unsigned prim; - memset(&draw, 0, sizeof(struct r600_drawl)); + draw.info = *info; draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, &rctx->index_buffer.index_size, - &draw.start, + &draw.info.start, info->count); draw.index_size = rctx->index_buffer.index_size; pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; r600_upload_index_buffer(rctx, &draw); } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; + draw.info.index_bias = info->start; } switch (draw.index_size) { @@ -1488,7 +1479,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) R600_ERR("unsupported index size %d\n", draw.index_size); return; } - if (r600_conv_pipe_prim(draw.mode, &prim)) + if (r600_conv_pipe_prim(draw.info.mode, &prim)) return; if (unlikely(rctx->ps_shader == NULL)) { R600_ERR("missing vertex shader\n"); @@ -1515,15 +1506,15 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) vgt.id = R600_PIPE_STATE_VGT; vgt.nregs = 0; r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); r600_context_pipe_state_set(&rctx->ctx, &vgt); - rdraw.vgt_num_indices = draw.count; + rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = 1; rdraw.vgt_index_type = vgt_dma_index_type; rdraw.vgt_draw_initiator = vgt_draw_initiator; diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index f2247546062..024bbc29168 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -207,7 +207,7 @@ void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl boolean flushed; u_upload_data(rctx->upload_vb, 0, - draw->count * draw->index_size, + draw->info.count * draw->index_size, rbuffer->user_buffer, &draw->index_buffer_offset, &draw->index_buffer, &flushed); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 824cf7fc46a..71054fe4d57 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -154,7 +154,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: - rctx->context.draw_vbo = evergreen_draw; + rctx->context.draw_vbo = evergreen_draw_vbo; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8d5e3c3b553..94912680c20 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -171,13 +171,8 @@ struct r600_pipe_context { }; struct r600_drawl { + struct pipe_draw_info info; struct pipe_context *ctx; - unsigned mode; - unsigned min_index; - unsigned max_index; - unsigned index_bias; - unsigned start; - unsigned count; unsigned index_size; unsigned index_buffer_offset; struct pipe_resource *index_buffer; @@ -186,7 +181,7 @@ struct r600_drawl { /* evergreen_state.c */ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); +void evergreen_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0191a119d5d..8501caf09e2 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -223,7 +223,7 @@ static void r600_draw_common(struct r600_drawl *draw) R600_ERR("unsupported index size %d\n", draw->index_size); return; } - if (r600_conv_pipe_prim(draw->mode, &prim)) + if (r600_conv_pipe_prim(draw->info.mode, &prim)) return; if (unlikely(rctx->ps_shader == NULL)) { R600_ERR("missing vertex shader\n"); @@ -250,15 +250,15 @@ static void r600_draw_common(struct r600_drawl *draw) vgt.id = R600_PIPE_STATE_VGT; vgt.nregs = 0; r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->info.min_index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); r600_context_pipe_state_set(&rctx->ctx, &vgt); - rdraw.vgt_num_indices = draw->count; + rdraw.vgt_num_indices = draw->info.count; rdraw.vgt_num_instances = 1; rdraw.vgt_index_type = vgt_dma_index_type; rdraw.vgt_draw_initiator = vgt_draw_initiator; @@ -274,35 +274,25 @@ static void r600_draw_common(struct r600_drawl *draw) void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; + struct r600_drawl draw = {}; - memset(&draw, 0, sizeof(struct r600_drawl)); + draw.info = *info; draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, &rctx->index_buffer.index_size, - &draw.start, + &draw.info.start, info->count); draw.index_size = rctx->index_buffer.index_size; pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; r600_upload_index_buffer(rctx, &draw); } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; + draw.info.index_bias = info->start; } r600_draw_common(&draw); -- cgit v1.2.3 From 2d7738eb2bee41656953d1173926f546c6711bad Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 22:17:41 +0100 Subject: r600g: consolidate draw_vbo functions (v2) Added a conditional to spi_update per Dave's comment. --- src/gallium/drivers/r600/evergreen_state.c | 122 ----------------------- src/gallium/drivers/r600/r600_pipe.c | 3 +- src/gallium/drivers/r600/r600_pipe.h | 4 +- src/gallium/drivers/r600/r600_state.c | 132 ------------------------- src/gallium/drivers/r600/r600_state_common.c | 139 +++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 258 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 7fb64d2b658..8bafb5a06fc 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1328,30 +1328,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) } } -static void evergreen_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate; @@ -1431,104 +1407,6 @@ void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) } } -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); - -void evergreen_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw = {}; - unsigned prim; - - draw.info = *info; - draw.ctx = ctx; - if (info->indexed && rctx->index_buffer.buffer) { - draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.info.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.info.start * draw.index_size; - draw.info.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.info.index_bias = info->start; - } - - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } - if (r600_conv_pipe_prim(draw.info.mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - evergreen_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw.info.count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw.index_buffer) { - rbuffer = (struct r600_resource*)draw.index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw.index_buffer_offset; - } - evergreen_context_draw(&rctx->ctx, &rdraw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_state *rstate = &shader->rstate; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 71054fe4d57..45c093121da 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -123,6 +123,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); r600_init_surface_functions(rctx); + rctx->context.draw_vbo = r600_draw_vbo; switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -137,7 +138,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - rctx->context.draw_vbo = r600_draw_vbo; r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -154,7 +154,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: - rctx->context.draw_vbo = evergreen_draw_vbo; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 94912680c20..cd3c965b83f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -181,7 +181,6 @@ struct r600_drawl { /* evergreen_state.c */ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); -void evergreen_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); @@ -221,7 +220,7 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, /* r600_state.c */ void r600_init_state_functions(struct r600_pipe_context *rctx); -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); @@ -270,6 +269,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); /* * common helpers diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8501caf09e2..5f7b09b5777 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -94,36 +94,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) } } -/* FIXME optimize away spi update when it's not needed */ -static void r600_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - void r600_vertex_buffer_update(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate; @@ -197,108 +167,6 @@ void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } } -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_resource *rbuffer; - unsigned prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->info.mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - r600_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->info.count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw->index_buffer_offset; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw = {}; - - draw.info = *info; - draw.ctx = ctx; - if (info->indexed && rctx->index_buffer.buffer) { - draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.info.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.info.start * draw.index_size; - draw.info.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.info.index_bias = info->start; - } - r600_draw_common(&draw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 9c4fd3b16dc..050ed7e0292 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -27,7 +27,9 @@ #include #include #include +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" +#include "r600d.h" /* common state between evergreen and r600 */ void r600_bind_blend_state(struct pipe_context *ctx, void *state) @@ -327,3 +329,140 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) r600_pipe_shader_destroy(ctx, shader); free(shader); } + +/* FIXME optimize away spi update when it's not needed */ +void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } + + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw = {}; + unsigned prim; + + draw.info = *info; + draw.ctx = ctx; + if (info->indexed && rctx->index_buffer.buffer) { + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + + r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + &rctx->index_buffer.index_size, + &draw.info.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; + r600_upload_index_buffer(rctx, &draw); + } else { + draw.info.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.info.mode, &prim)) + return; + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); + return; + } + + r600_spi_update(rctx); + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.info.count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_draw(&rctx->ctx, &rdraw); + } else { + r600_context_draw(&rctx->ctx, &rdraw); + } + + pipe_resource_reference(&draw.index_buffer, NULL); +} -- cgit v1.2.3 From 73fb2b7c9074c8878e8ff617ad6d6a21b93b66f2 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 02:59:44 +0100 Subject: r600g: consolidate vertex_buffer_update functions --- src/gallium/drivers/r600/evergreen_state.c | 109 ++++++++------------------- src/gallium/drivers/r600/r600_pipe.h | 13 +++- src/gallium/drivers/r600/r600_state.c | 97 ++++++------------------ src/gallium/drivers/r600/r600_state_common.c | 79 ++++++++++++++++--- 4 files changed, 134 insertions(+), 164 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 8bafb5a06fc..08f9f753afe 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1328,85 +1328,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) } } -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i]; - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = 0; - } - if (vertex_buffer == NULL || rbuffer == NULL) - continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_state *rstate = &shader->rstate; @@ -1605,3 +1526,33 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), NULL); return rstate; } + +void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + unsigned index, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, index); +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index cd3c965b83f..ae73f56d12c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -185,7 +185,11 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); +void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + unsigned index, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); @@ -224,7 +228,11 @@ void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_vertex_buffer_update(struct r600_pipe_context *rctx); +void r600_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + unsigned index, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -269,6 +277,7 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_vertex_buffer_update(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); /* diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5f7b09b5777..e4423a309d3 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -94,79 +94,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) } } -void r600_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i]; - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = 0; - } - if (vertex_buffer == NULL || rbuffer == NULL) - continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { @@ -1339,3 +1266,27 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), NULL); return rstate; } + +void r600_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + unsigned index, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, index); +} diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 050ed7e0292..c07b09d15ca 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -130,11 +130,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) if (v) { rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } + r600_vertex_buffer_update(rctx); } if (v) { @@ -216,11 +212,8 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } rctx->nvertex_buffer = count; rctx->vb_max_index = max_index; - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } + + r600_vertex_buffer_update(rctx); } @@ -366,6 +359,72 @@ void r600_spi_update(struct r600_pipe_context *rctx) r600_context_pipe_state_set(&rctx->ctx, &rstate); } +void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, offset; + + /* we don't update until we know vertex elements */ + if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) + return; + + if (rctx->vertex_elements->incompatible_layout) { + /* translate rebind new vertex elements so + * return once translated + */ + r600_begin_vertex_translate(rctx); + return; + } + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx); + rctx->any_user_vbs = FALSE; + } + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + rctx->nvs_resource = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + rctx->nvs_resource = rctx->nvertex_buffer; + } + + for (i = 0 ; i < rctx->nvs_resource; i++) { + rstate = &rctx->vs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vertex_buffer[i]; + rbuffer = (struct r600_resource*)vertex_buffer->buffer; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_add_vertex_attrib(rctx, rstate, i, + rbuffer, offset, + vertex_buffer->stride); + } else { + r600_pipe_add_vertex_attrib(rctx, rstate, i, + rbuffer, offset, + vertex_buffer->stride); + } + } +} + void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; -- cgit v1.2.3 From 15730a8207374936e354d945730070cb29c9547c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 03:15:52 +0100 Subject: r600g: consolidate set_constant_buffer functions --- src/gallium/drivers/r600/evergreen_state.c | 50 +--------------------------- src/gallium/drivers/r600/r600_pipe.h | 2 ++ src/gallium/drivers/r600/r600_state.c | 48 -------------------------- src/gallium/drivers/r600/r600_state_common.c | 48 ++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 97 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 08f9f753afe..fa239a816b5 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -837,54 +837,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } -static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource_buffer *rbuffer = r600_buffer(buffer); - uint32_t offset; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - r600_upload_const_buffer(rctx, &rbuffer, &offset); - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - if (!rbuffer->user_buffer) - pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -912,7 +864,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; - rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ae73f56d12c..a0164868f34 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -277,6 +277,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer); void r600_vertex_buffer_update(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e4423a309d3..8c583b4f39a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -889,54 +889,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource_buffer *rbuffer = r600_buffer(buffer); - uint32_t offset; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - r600_upload_const_buffer(rctx, &rbuffer, &offset); - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } - - if (!rbuffer->user_buffer) - pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c07b09d15ca..26e5981e982 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -359,6 +359,54 @@ void r600_spi_update(struct r600_pipe_context *rctx) r600_context_pipe_state_set(&rctx->ctx, &rstate); } +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + uint32_t offset; + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (buffer == NULL) { + return; + } + + r600_upload_const_buffer(rctx, &rbuffer, &offset); + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } + + if (!rbuffer->user_buffer) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); +} + void r600_vertex_buffer_update(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate; -- cgit v1.2.3 From 8c631cfeae29b5236928f759e222aa35e6e4984c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 28 Jan 2011 22:04:09 +0100 Subject: r600g: rework vertex buffer uploads Only upload the [min_index, max_index] range instead of [0, userbuf_size]. This an important optimization. Framerate in Lightsmark: Before: 22 fps After: 75 fps The same optimization is already in r300g. --- src/gallium/drivers/r600/r600_buffer.c | 65 +++++++++------ src/gallium/drivers/r600/r600_pipe.h | 5 +- src/gallium/drivers/r600/r600_state_common.c | 120 ++++++++++++++------------- 3 files changed, 106 insertions(+), 84 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 024bbc29168..6ebe6ab2364 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -201,39 +201,52 @@ struct u_resource_vtbl r600_buffer_vtbl = void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { - - if (r600_is_user_buffer(draw->index_buffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); - boolean flushed; - - u_upload_data(rctx->upload_vb, 0, - draw->info.count * draw->index_size, - rbuffer->user_buffer, - &draw->index_buffer_offset, - &draw->index_buffer, &flushed); - } + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + boolean flushed; + + u_upload_data(rctx->upload_vb, 0, + draw->info.count * draw->index_size, + rbuffer->user_buffer, + &draw->index_buffer_offset, + &draw->index_buffer, &flushed); } -void r600_upload_user_buffers(struct r600_pipe_context *rctx) +void r600_upload_user_buffers(struct r600_pipe_context *rctx, + int min_index, int max_index) { - int i, nr; - - nr = rctx->vertex_elements->count; - nr = rctx->nvertex_buffer; + int i, nr = rctx->vertex_elements->count; + unsigned count = max_index + 1 - min_index; + boolean flushed; + boolean uploaded[32] = {0}; for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (r600_is_user_buffer(vb->buffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer); - boolean flushed; - - u_upload_data(rctx->upload_vb, 0, - vb->buffer->width0, - rbuffer->user_buffer, + unsigned index = rctx->vertex_elements->elements[i].vertex_buffer_index; + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[index]; + struct r600_resource_buffer *userbuf = r600_buffer(vb->buffer); + + if (userbuf && userbuf->user_buffer && !uploaded[index]) { + unsigned first, size; + + if (vb->stride) { + first = vb->stride * min_index; + size = vb->stride * count; + } else { + first = 0; + size = rctx->vertex_elements->hw_format_size[i]; + } + + u_upload_data(rctx->upload_vb, first, size, + userbuf->user_buffer + first, &vb->buffer_offset, - &vb->buffer, + &rctx->real_vertex_buffer[index], &flushed); + + vb->buffer_offset -= first; + + /* vertex_arrays_dirty = TRUE; */ + uploaded[index] = TRUE; + } else { + assert(rctx->real_vertex_buffer[index]); } } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index a0164868f34..360ee2af1fc 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -144,6 +144,7 @@ struct r600_pipe_context { struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned nvertex_buffer; unsigned cb_target_mask; /* for saving when using blitter */ @@ -207,7 +208,8 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -void r600_upload_user_buffers(struct r600_pipe_context *rctx); +void r600_upload_user_buffers(struct r600_pipe_context *rctx, + int min_index, int max_index); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -279,7 +281,6 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer); -void r600_vertex_buffer_update(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); /* diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 26e5981e982..409a07bda17 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -121,20 +121,10 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - rctx->vertex_elements = v; if (v) { rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); - r600_vertex_buffer_update(rctx); - } - - if (v) { -// rctx->vs_rebuild = TRUE; } } @@ -175,45 +165,62 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_vertex_buffer *vbo; - unsigned max_index = (unsigned)-1; - - if (rctx->family >= CHIP_CEDAR) { - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); - } - } else { - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); - } - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + unsigned max_index = ~0; + int i; - for (int i = 0; i < count; i++) { + for (i = 0; i < count; i++) { vbo = (struct pipe_vertex_buffer*)&buffers[i]; - rctx->vertex_buffer[i].buffer = NULL; - if (buffers[i].buffer == NULL) + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, vbo->buffer); + pipe_resource_reference(&rctx->real_vertex_buffer[i], NULL); + + if (!vbo->buffer) { + /* Zero states. */ + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } continue; - if (r600_is_user_buffer(buffers[i].buffer)) + } + + if (r600_is_user_buffer(vbo->buffer)) { rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + continue; + } + + pipe_resource_reference(&rctx->real_vertex_buffer[i], vbo->buffer); /* The stride of zero means we will be fetching only the first * vertex, so don't care about max_index. */ - if (!vbo->stride) + if (!vbo->stride) { continue; + } - if (vbo->max_index == ~0) { - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + /* Update the maximum index. */ + { + unsigned vbo_max_index = + (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + max_index = MIN2(max_index, vbo_max_index); } - max_index = MIN2(vbo->max_index, max_index); } + + for (; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&rctx->real_vertex_buffer[i], NULL); + + /* Zero states. */ + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } + } + + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + rctx->nvertex_buffer = count; rctx->vb_max_index = max_index; - - r600_vertex_buffer_update(rctx); } @@ -407,30 +414,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); } -void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate; struct r600_resource *rbuffer; struct pipe_vertex_buffer *vertex_buffer; unsigned i, offset; - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - if (rctx->vertex_elements->vbuffer_need_offset) { /* one resource per vertex elements */ rctx->nvs_resource = rctx->vertex_elements->count; @@ -449,12 +439,12 @@ void r600_vertex_buffer_update(struct r600_pipe_context *rctx) unsigned vbuffer_index; vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; + rbuffer = (struct r600_resource*)rctx->real_vertex_buffer[vbuffer_index]; offset = rctx->vertex_elements->vbuffer_offset[i]; } else { /* bind vertex buffer once */ vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; + rbuffer = (struct r600_resource*)rctx->real_vertex_buffer[i]; offset = 0; } if (vertex_buffer == NULL || rbuffer == NULL) @@ -483,6 +473,16 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_drawl draw = {}; unsigned prim; + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + } + + if (rctx->any_user_vbs) { + r600_upload_user_buffers(rctx, info->min_index, info->max_index); + } + + r600_vertex_buffer_update(rctx); + draw.info = *info; draw.ctx = ctx; if (info->indexed && rctx->index_buffer.buffer) { @@ -497,7 +497,10 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); draw.index_buffer_offset = draw.info.start * draw.index_size; draw.info.start = 0; - r600_upload_index_buffer(rctx, &draw); + + if (r600_is_user_buffer(draw.index_buffer)) { + r600_upload_index_buffer(rctx, &draw); + } } else { draw.info.index_bias = info->start; } @@ -572,4 +575,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } pipe_resource_reference(&draw.index_buffer, NULL); + + /* delete previous translated vertex elements */ + if (rctx->tran.new_velems) { + r600_end_vertex_translate(rctx); + } } -- cgit v1.2.3 From 70e656b4ebdd3cd2962ce66544ae9af349ecd59a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 13:49:41 +0100 Subject: r600g: fix vertex format fallback This fixes: - piglit/draw-vertices - piglit/draw-vertices-half-float --- src/gallium/drivers/r600/r600_asm.c | 8 +------- src/gallium/drivers/r600/r600_blit.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 4 +++- src/gallium/drivers/r600/r600_state_common.c | 29 +++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_translate.c | 22 ++++++++++++--------- 5 files changed, 44 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 9cdd10f9876..3b0d01b8720 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1889,7 +1889,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, } switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ + /* Half-floats, floats, ints */ case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[i].size) { case 16: @@ -1901,8 +1901,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_FLOAT; break; case 3: - *format = FMT_16_16_16_FLOAT; - break; case 4: *format = FMT_16_16_16_16_FLOAT; break; @@ -1942,8 +1940,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_8_8; break; case 3: - // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_8_8_8_8; break; @@ -1958,8 +1954,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16; break; case 3: - // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_16_16_16_16; break; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index b9ec9592e35..b487182e3aa 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -53,7 +53,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (rctx->states[R600_PIPE_STATE_CLIP]) { util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); + util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffers, rctx->vertex_buffer); rctx->vertex_elements = NULL; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 360ee2af1fc..301888abc78 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -125,6 +125,7 @@ struct r600_translate_context { struct translate_cache *translate_cache; /* The vertex buffer slot containing the translated buffer. */ unsigned vb_slot; + void *saved_velems; void *new_velems; }; @@ -145,7 +146,8 @@ struct r600_pipe_context { struct pipe_index_buffer index_buffer; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; + unsigned nvertex_buffers; + unsigned nreal_vertex_buffers; /* with the translated vertex buffer */ unsigned cb_target_mask; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 409a07bda17..4a2c7fe935c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -205,7 +205,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } - for (; i < rctx->nvertex_buffer; i++) { + for (; i < rctx->nreal_vertex_buffers; i++) { pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); pipe_resource_reference(&rctx->real_vertex_buffer[i], NULL); @@ -219,7 +219,8 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - rctx->nvertex_buffer = count; + rctx->nvertex_buffers = count; + rctx->nreal_vertex_buffers = count; rctx->vb_max_index = max_index; } @@ -252,6 +253,28 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + + /* r600 doesn't seem to support 32_*SCALED, these formats + * aren't in D3D10 either. */ + FORMAT_REPLACE(R32_UNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_USCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_SNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); default:; } v->incompatible_layout = @@ -426,7 +449,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) rctx->nvs_resource = rctx->vertex_elements->count; } else { /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; + rctx->nvs_resource = rctx->nreal_vertex_buffers; } for (i = 0 ; i < rctx->nvs_resource; i++) { diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index f80fa7af941..5f63af59ccc 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -98,14 +98,14 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) tr = translate_cache_find(rctx->tran.translate_cache, &key); /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { + for (i = 0; i < rctx->nvertex_buffers; i++) { if (vb_translated[i]) { struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; vb_map[i] = pipe_buffer_map(pipe, vb->buffer, PIPE_TRANSFER_READ, &vb_transfer[i]); - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + tr->set_buffer(tr, i, vb_map[i], vb->stride, ~0); } } @@ -123,7 +123,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) tr->run(tr, 0, num_verts, 0, out_map); /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { + for (i = 0; i < rctx->nvertex_buffers; i++) { if (vb_translated[i]) { pipe_buffer_unmap(pipe, vb_transfer[i]); } @@ -136,11 +136,14 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); + pipe_resource_reference(&rctx->real_vertex_buffer[i], out_buffer); vb->buffer_offset = 0; - vb->max_index = num_verts - 1; vb->stride = key.output_stride; rctx->tran.vb_slot = i; + + if (i >= rctx->nvertex_buffers) { + rctx->nreal_vertex_buffers = i+1; + } break; } } @@ -159,6 +162,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) } } + rctx->tran.saved_velems = rctx->vertex_elements; tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); pipe->bind_vertex_elements_state(pipe, tmp); rctx->tran.new_velems = tmp; @@ -174,11 +178,14 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) return; } /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + rctx->tran.saved_velems = NULL; pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); rctx->tran.new_velems = NULL; /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL); + pipe_resource_reference(&rctx->real_vertex_buffer[rctx->tran.vb_slot], NULL); + rctx->nreal_vertex_buffers = rctx->nvertex_buffers; } void r600_translate_index_buffer(struct r600_pipe_context *r600, @@ -192,8 +199,5 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, *index_size = 2; *start = 0; break; - case 2: - case 4: - break; } } -- cgit v1.2.3 From 73a40d1383071fe25599509d218f4c40d049988d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 16:22:08 +0100 Subject: r600g: rework vertex format fallback 1) Only translate the [min_index, max_index] range. 2) Upload translated vertices via the uploader. --- src/gallium/drivers/r600/r600_pipe.h | 3 +- src/gallium/drivers/r600/r600_state_common.c | 2 +- src/gallium/drivers/r600/r600_translate.c | 97 +++++++++++++++------------- 3 files changed, 55 insertions(+), 47 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 301888abc78..cf4211cb8d3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -251,7 +251,8 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned layer); /* r600_translate.c */ -void r600_begin_vertex_translate(struct r600_pipe_context *rctx); +void r600_begin_vertex_translate(struct r600_pipe_context *rctx, + int min_index, int max_index); void r600_end_vertex_translate(struct r600_pipe_context *rctx); void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 4a2c7fe935c..2df8188f0a2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -497,7 +497,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) unsigned prim; if (rctx->vertex_elements->incompatible_layout) { - r600_begin_vertex_translate(rctx); + r600_begin_vertex_translate(rctx, info->min_index, info->max_index); } if (rctx->any_user_vbs) { diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 5f63af59ccc..4b88a9bfd46 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -22,13 +22,16 @@ * * Authors: Dave Airlie */ + #include "translate/translate_cache.h" #include "translate/translate.h" #include #include +#include "util/u_upload_mgr.h" #include "r600_pipe.h" -void r600_begin_vertex_translate(struct r600_pipe_context *rctx) +void r600_begin_vertex_translate(struct r600_pipe_context *rctx, + int min_index, int max_index) { struct pipe_context *pipe = &rctx->context; struct translate_key key = {0}; @@ -37,18 +40,16 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) struct translate *tr; struct r600_vertex_element *ve = rctx->vertex_elements; boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; + uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; + struct pipe_resource *out_buffer = NULL; + unsigned i, num_verts, out_offset; struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - void *tmp; + boolean flushed; /* Initialize the translate key, i.e. the recipe how vertices should be * translated. */ for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; enum pipe_format output_format = ve->hw_format[i]; unsigned output_format_size = ve->hw_format_size[i]; @@ -81,10 +82,10 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) /* Add this vertex element. */ te = &key.element[key.nr_elements]; /*te->type; - te->instance_divisor;*/ + te->instance_divisor;*/ te->input_buffer = ve->elements[i].vertex_buffer_index; te->input_format = ve->elements[i].src_format; - te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->input_offset = ve->elements[i].src_offset; te->output_format = output_format; te->output_offset = key.output_stride; @@ -105,19 +106,22 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) vb_map[i] = pipe_buffer_map(pipe, vb->buffer, PIPE_TRANSFER_READ, &vb_transfer[i]); - tr->set_buffer(tr, i, vb_map[i], vb->stride, ~0); + tr->set_buffer(tr, i, + vb_map[i] + vb->buffer_offset + vb->stride * min_index, + vb->stride, ~0); } } /* Create and map the output buffer. */ - num_verts = rctx->vb_max_index + 1; + num_verts = max_index + 1 - min_index; - out_buffer = pipe_buffer_create(&rctx->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); + u_upload_alloc(rctx->upload_vb, + key.output_stride * min_index, + key.output_stride * num_verts, + &out_offset, &out_buffer, &flushed, + (void**)&out_map); - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); + out_offset -= key.output_stride * min_index; /* Translate. */ tr->run(tr, 0, num_verts, 0, out_map); @@ -129,16 +133,10 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) } } - pipe_buffer_unmap(pipe, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ + /* Find the first free slot. */ + rctx->tran.vb_slot = ~0; for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&rctx->real_vertex_buffer[i], out_buffer); - vb->buffer_offset = 0; - vb->stride = key.output_stride; + if (!rctx->vertex_buffer[i].buffer) { rctx->tran.vb_slot = i; if (i >= rctx->nvertex_buffers) { @@ -148,24 +146,31 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx) } } - /* Save and replace vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); + if (rctx->tran.vb_slot != ~0) { + /* Setup the new vertex buffer. */ + pipe_resource_reference(&rctx->real_vertex_buffer[rctx->tran.vb_slot], out_buffer); + rctx->vertex_buffer[rctx->tran.vb_slot].buffer_offset = out_offset; + rctx->vertex_buffer[rctx->tran.vb_slot].stride = key.output_stride; + + /* Setup new vertex elements. */ + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } } - } - rctx->tran.saved_velems = rctx->vertex_elements; - tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, tmp); - rctx->tran.new_velems = tmp; + rctx->tran.saved_velems = rctx->vertex_elements; + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } pipe_resource_reference(&out_buffer, NULL); } @@ -177,6 +182,7 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) if (rctx->tran.new_velems == NULL) { return; } + /* Restore vertex elements. */ pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); rctx->tran.saved_velems = NULL; @@ -188,10 +194,11 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) rctx->nreal_vertex_buffers = rctx->nvertex_buffers; } +/* XXX Use the uploader. */ void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { switch (*index_size) { case 1: -- cgit v1.2.3 From 77900843b42d9672c6b10f49c177a172041f6a8d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 16:39:45 +0100 Subject: r600g: upload translated indices via the uploader --- src/gallium/drivers/r600/r600_translate.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 4b88a9bfd46..68429b99d01 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -194,17 +194,27 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx) rctx->nreal_vertex_buffers = rctx->nvertex_buffers; } -/* XXX Use the uploader. */ void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + u_upload_alloc(r600->upload_vb, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r600->context, *index_buffer, 0, *start, count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; - *start = 0; + *start = out_offset / 2; break; } } -- cgit v1.2.3 From 8d0a540020f6389ca5efcd0e1fbef45a4a1f5b6a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 13:58:02 +0100 Subject: r300g: rework vertex format fallback 1) Only translate the [min_index, max_index] range. 2) Upload translated vertices via the uploader. 3) Rename valid_vertex_buffer[] to real_vertex_buffer[] --- src/gallium/drivers/r300/r300_context.c | 4 +- src/gallium/drivers/r300/r300_context.h | 4 +- src/gallium/drivers/r300/r300_emit.c | 8 +- src/gallium/drivers/r300/r300_render.c | 8 +- src/gallium/drivers/r300/r300_render_translate.c | 118 +++++++++++++---------- src/gallium/drivers/r300/r300_screen_buffer.c | 13 +-- src/gallium/drivers/r300/r300_state.c | 9 +- 7 files changed, 86 insertions(+), 78 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index c8966ee59a5..114fb316c05 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -84,9 +84,9 @@ static void r300_release_referenced_objects(struct r300_context *r300) pipe_resource_reference(&r300->vbo, NULL); /* Vertex buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { + for (i = 0; i < r300->real_vertex_buffer_count; i++) { pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); + pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); } /* If there are any queries pending or not destroyed, remove them now. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 57ecfb168f8..430a0ddbb5c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -445,6 +445,7 @@ struct r300_translate_context { /* Saved and new vertex element state. */ void *saved_velems, *new_velems; + unsigned vb_slot; }; struct r300_context { @@ -560,8 +561,9 @@ struct r300_context { /* May contain user buffers. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; /* Contains only non-user buffers. */ - struct pipe_resource *valid_vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; + int real_vertex_buffer_count; /* with the translated buffer. */ int vertex_buffer_max_index; boolean any_user_vbs; /* Vertex elements for Gallium. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2157cb3ede7..34f87f74d3e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -847,7 +847,7 @@ static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned pac void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_resource **valid_vbuf = r300->valid_vertex_buffer; + struct pipe_resource **valid_vbuf = r300->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; struct r300_buffer *buf; int i; @@ -1227,9 +1227,9 @@ validate: r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { - struct pipe_resource **buf = r300->valid_vertex_buffer; - struct pipe_resource **last = r300->valid_vertex_buffer + - r300->vertex_buffer_count; + struct pipe_resource **buf = r300->real_vertex_buffer; + struct pipe_resource **last = r300->real_vertex_buffer + + r300->real_vertex_buffer_count; for (; buf != last; buf++) { if (!*buf) continue; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b35822c82f8..41ddd748bbf 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -325,7 +325,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - buf = r300->valid_vertex_buffer[vbi]; + buf = r300->real_vertex_buffer[vbi]; if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) { return FALSE; @@ -390,7 +390,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Map the buffer. */ if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - r300->valid_vertex_buffer[vbi], + r300->real_vertex_buffer[vbi], PIPE_TRANSFER_READ, &transfer[vbi]); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; @@ -787,7 +787,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->vertex_buffer_count; i++) { + for (i = 0; i < r300->real_vertex_buffer_count; i++) { if (r300->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, r300->vertex_buffer[i].buffer, @@ -810,7 +810,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->vertex_buffer_count; i++) { + for (i = 0; i < r300->real_vertex_buffer_count; i++) { if (r300->vertex_buffer[i].buffer) { pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index c48062c8084..3b544ae63b7 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -30,9 +30,8 @@ #include "r300_context.h" #include "translate/translate.h" #include "util/u_index_modify.h" +#include "util/u_upload_mgr.h" -/* XXX Optimization: use min_index and translate only that range. */ -/* XXX Use the uploader. */ void r300_begin_vertex_translate(struct r300_context *r300, int min_index, int max_index) { @@ -43,11 +42,12 @@ void r300_begin_vertex_translate(struct r300_context *r300, struct translate *tr; struct r300_vertex_element_state *ve = r300->velems; boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - unsigned slot; + uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; + struct pipe_resource *out_buffer = NULL; + unsigned i, num_verts, out_offset; + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + boolean flushed; /* Initialize the translate key, i.e. the recipe how vertices should be * translated. */ @@ -59,6 +59,7 @@ void r300_begin_vertex_translate(struct r300_context *r300, /* Check for support. */ if (ve->velem[i].src_format == ve->hw_format[i] && + /* These two are r300-specific. */ (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && vb->stride % 4 == 0) { continue; @@ -66,23 +67,23 @@ void r300_begin_vertex_translate(struct r300_context *r300, /* Workaround for translate: output floats instead of halfs. */ switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; } /* Add this vertex element. */ @@ -91,7 +92,7 @@ void r300_begin_vertex_translate(struct r300_context *r300, te->instance_divisor;*/ te->input_buffer = ve->velem[i].vertex_buffer_index; te->input_format = ve->velem[i].src_format; - te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; + te->input_offset = ve->velem[i].src_offset; te->output_format = output_format; te->output_offset = key.output_stride; @@ -112,19 +113,22 @@ void r300_begin_vertex_translate(struct r300_context *r300, vb_map[i] = pipe_buffer_map(pipe, vb->buffer, PIPE_TRANSFER_READ, &vb_transfer[i]); - tr->set_buffer(tr, i, vb_map[i], vb->stride, max_index); + tr->set_buffer(tr, i, + vb_map[i] + vb->buffer_offset + vb->stride * min_index, + vb->stride, ~0); } } /* Create and map the output buffer. */ - num_verts = max_index + 1; + num_verts = max_index + 1 - min_index; - out_buffer = pipe_buffer_create(&r300->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); + u_upload_alloc(r300->upload_vb, + key.output_stride * min_index, + key.output_stride * num_verts, + &out_offset, &out_buffer, &flushed, + (void**)&out_map); - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); + out_offset -= key.output_stride * min_index; /* Translate. */ tr->run(tr, 0, num_verts, 0, out_map); @@ -136,48 +140,46 @@ void r300_begin_vertex_translate(struct r300_context *r300, } } - pipe_buffer_unmap(pipe, out_transfer); - /* Setup the new vertex buffer in the first free slot. */ - slot = ~0; + r300->tran.vb_slot = ~0; for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&r300->valid_vertex_buffer[i], out_buffer); - vb->buffer_offset = 0; - vb->stride = key.output_stride; - slot = i; - /* XXX probably need to preserve the real count for u_blitter_save_*. */ - r300->vertex_buffer_count = MAX2(r300->vertex_buffer_count, i+1); + if (!r300->vertex_buffer[i].buffer) { + r300->tran.vb_slot = i; + + if (i >= r300->vertex_buffer_count) { + r300->real_vertex_buffer_count = i+1; + } + + /* r300-specific: */ r300->validate_buffers = TRUE; + r300->vertex_arrays_dirty = TRUE; break; } } - /* XXX This may fail. */ - assert(slot != ~0); - - /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - r300->tran.saved_velems = r300->velems; + if (r300->tran.vb_slot != ~0) { + /* Setup the new vertex buffer. */ + pipe_resource_reference(&r300->real_vertex_buffer[r300->tran.vb_slot], out_buffer); + r300->vertex_buffer[r300->tran.vb_slot].buffer_offset = out_offset; + r300->vertex_buffer[r300->tran.vb_slot].stride = key.output_stride; + /* Setup new vertex elements. */ for (i = 0; i < ve->count; i++) { if (vb_translated[ve->velem[i].vertex_buffer_index]) { te = &key.element[tr_elem_index[i]]; new_velems[i].instance_divisor = ve->velem[i].instance_divisor; new_velems[i].src_format = te->output_format; new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = slot; + new_velems[i].vertex_buffer_index = r300->tran.vb_slot; } else { memcpy(&new_velems[i], &ve->velem[i], sizeof(struct pipe_vertex_element)); } } + r300->tran.saved_velems = r300->velems; r300->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); } @@ -188,9 +190,19 @@ void r300_end_vertex_translate(struct r300_context *r300) { struct pipe_context *pipe = &r300->context; + if (r300->tran.new_velems == NULL) { + return; + } + /* Restore vertex elements. */ pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); + r300->tran.saved_velems = NULL; pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); + r300->tran.new_velems = NULL; + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&r300->real_vertex_buffer[r300->tran.vb_slot], NULL); + r300->real_vertex_buffer_count = r300->vertex_buffer_count; } /* XXX Use the uploader. */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index cc3c1d7687e..af6fa1048c8 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -87,7 +87,7 @@ void r300_upload_user_buffers(struct r300_context *r300, int i, nr = r300->velems->count; unsigned count = max_index + 1 - min_index; boolean flushed; - boolean uploaded[16] = {0}; + boolean uploaded[32] = {0}; for (i = 0; i < nr; i++) { unsigned index = r300->velems->velem[i].vertex_buffer_index; @@ -105,16 +105,10 @@ void r300_upload_user_buffers(struct r300_context *r300, size = r300->velems->hw_format_size[i]; } - DBG(r300, DBG_UPLOAD, - "Uploading %i bytes, index: %i, buffer: %p, userptr: %p " - "offset: %i, stride: %i.\n", - size, index, userbuf, userbuf->user_buffer, - vb->buffer_offset, vb->stride); - u_upload_data(r300->upload_vb, first, size, userbuf->user_buffer + first, &vb->buffer_offset, - &r300->valid_vertex_buffer[index], + &r300->real_vertex_buffer[index], &flushed); vb->buffer_offset -= first; @@ -127,10 +121,9 @@ void r300_upload_user_buffers(struct r300_context *r300, } uploaded[index] = TRUE; } else { - assert(r300->valid_vertex_buffer[index]); + assert(r300->real_vertex_buffer[index]); } } - DBG(r300, DBG_UPLOAD, "-------\n"); } static void r300_buffer_destroy(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index ba456d413f6..dad41ab91ed 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1560,21 +1560,22 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, /* Reference our buffer. */ pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer); if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) { - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); + pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); } else { - pipe_resource_reference(&r300->valid_vertex_buffer[i], vbo->buffer); + pipe_resource_reference(&r300->real_vertex_buffer[i], vbo->buffer); } } - for (; i < r300->vertex_buffer_count; i++) { + for (; i < r300->real_vertex_buffer_count; i++) { /* Dereference any old buffers. */ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); + pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); } memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); r300->vertex_buffer_count = count; + r300->real_vertex_buffer_count = count; } static void r300_set_index_buffer(struct pipe_context* pipe, -- cgit v1.2.3 From debc45bca07a5dfad4199079f080b35c19f00e85 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 29 Jan 2011 16:53:48 +0100 Subject: r300g: upload translated indices via the uploader --- src/gallium/drivers/r300/r300_render_translate.c | 65 ++++++++++++++++-------- 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 3b544ae63b7..ec4eaa9d624 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -205,34 +205,59 @@ void r300_end_vertex_translate(struct r300_context *r300) r300->real_vertex_buffer_count = r300->vertex_buffer_count; } -/* XXX Use the uploader. */ void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); - *index_size = 2; - *start = 0; + case 1: + u_upload_alloc(r300->upload_ib, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r300->context, *index_buffer, index_offset, + *start, count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); + *index_size = 2; + *start = out_offset / 2; + r300->validate_buffers = TRUE; + break; + + case 2: + if (index_offset) { + u_upload_alloc(r300->upload_ib, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 2; r300->validate_buffers = TRUE; - break; + } + break; - case 2: - if (index_offset) { - util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + case 4: + if (index_offset) { + u_upload_alloc(r300->upload_ib, 0, count * 4, + &out_offset, &out_buffer, &flushed, &ptr); - case 4: - if (index_offset) { - util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 4; + r300->validate_buffers = TRUE; + } + break; } } -- cgit v1.2.3 From 71f610e26ea7d71043b1a8ceeb8af7d11d75d6ab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 30 Jan 2011 18:07:10 +1000 Subject: r600g: fixes a segfault in the piglit fbo-genmipmap-formats test. should be no need to unset this ptr here and if we don't end up using the blitter we've just broken the state. --- src/gallium/drivers/r600/r600_blit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index b487182e3aa..71a504cb9a2 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -55,8 +55,6 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op } util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffers, rctx->vertex_buffer); - rctx->vertex_elements = NULL; - if (op & (R600_CLEAR_SURFACE | R600_COPY)) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); -- cgit v1.2.3 From cad0520179ef2a62b2756b01d37663f495a00619 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sun, 30 Jan 2011 01:08:54 -0800 Subject: r600g: Fix void pointer arithmetic. Fixes SCons build. --- src/gallium/drivers/r600/r600_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 6ebe6ab2364..2e225860906 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -236,7 +236,7 @@ void r600_upload_user_buffers(struct r600_pipe_context *rctx, } u_upload_data(rctx->upload_vb, first, size, - userbuf->user_buffer + first, + (uint8_t*)userbuf->user_buffer + first, &vb->buffer_offset, &rctx->real_vertex_buffer[index], &flushed); -- cgit v1.2.3 From 38b54158b68479e1f97c8452ba0d67f50dce7582 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Sun, 30 Jan 2011 18:57:39 +0100 Subject: r600g: Update the flushed depth texture after drawing to the corresponding texture. I know Jerome will probably rewrite the way depth textures work sometime soon. For the time being this should at least make common depth texture usage for shadowing work properly though. --- src/gallium/drivers/r600/r600_blit.c | 28 ++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_pipe.h | 2 ++ src/gallium/drivers/r600/r600_resource.h | 1 + src/gallium/drivers/r600/r600_state_common.c | 8 ++++++++ src/gallium/drivers/r600/r600_texture.c | 7 +++---- 5 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 71a504cb9a2..83c02e55802 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + rctx->blit = true; r600_context_queries_suspend(&rctx->ctx); util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); @@ -74,6 +75,7 @@ static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_context_queries_resume(&rctx->ctx); + rctx->blit = false; } void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) @@ -82,6 +84,9 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; + + if (texture->flushed) return; + surf_tmpl.format = texture->resource.base.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; @@ -102,11 +107,34 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); r600_blitter_end(ctx); + texture->flushed = true; pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } +void r600_flush_depth_textures(struct r600_pipe_context *rctx) +{ + unsigned int i; + + if (rctx->blit) return; + + /* FIXME: This handles fragment shader textures only. */ + + for (i = 0; i < rctx->ps_samplers.n_views; ++i) { + struct r600_pipe_sampler_view *view; + struct r600_resource_texture *tex; + + view = rctx->ps_samplers.views[i]; + if (!view) continue; + + tex = (struct r600_resource_texture *)view->base.texture; + if (!tex->depth) continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } +} + static void r600_clear(struct pipe_context *ctx, unsigned buffers, const float *rgba, double depth, unsigned stencil) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index cf4211cb8d3..beb4db12b08 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -171,6 +171,7 @@ struct r600_pipe_context { unsigned vb_max_index; struct r600_translate_context tran; struct u_upload_mgr *upload_const; + bool blit; }; struct r600_drawl { @@ -197,6 +198,7 @@ void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_flush_depth_textures(struct r600_pipe_context *rctx); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 6e302444712..5b5df5a5bac 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -63,6 +63,7 @@ struct r600_resource_texture { unsigned depth; unsigned dirty; struct r600_resource_texture *flushed_depth_texture; + bool flushed; }; #define R600_BUFFER_MAGIC 0xabcd1600 diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 2df8188f0a2..e086e272c8e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -496,6 +496,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_drawl draw = {}; unsigned prim; + r600_flush_depth_textures(rctx); + if (rctx->vertex_elements->incompatible_layout) { r600_begin_vertex_translate(rctx, info->min_index, info->max_index); } @@ -597,6 +599,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_context_draw(&rctx->ctx, &rdraw); } + if (rctx->framebuffer.zsbuf) + { + struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; + ((struct r600_resource_texture *)tex)->flushed = false; + } + pipe_resource_reference(&draw.index_buffer, NULL); /* delete previous translated vertex elements */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 51560bd19e6..91076269ec7 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -302,6 +302,9 @@ r600_texture_create_object(struct pipe_screen *screen, resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + if (util_format_is_depth_or_stencil(base->format)) + rtex->depth = 1; + if (array_mode) rtex->tiled = 1; r600_setup_miptree(screen, rtex, array_mode); @@ -632,7 +635,6 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->staging_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -640,9 +642,6 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, } pipe_resource_reference(&rtransfer->staging_texture, NULL); } - if (rtex->flushed_depth_texture) { - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); - } pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } -- cgit v1.2.3 From 5555cd776b970bce020be59193054474a2a63317 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 31 Jan 2011 10:01:06 +1000 Subject: r600g: handle the write all cbufs property. This only works on r600/r700 so far, evergreen doesn't appear to have the multiwrite enable bit in the color control, so we may have to actually do a shader rewrite on EG hardware. remove some duplicate code reg defines also. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/evergreend.h | 3 --- src/gallium/drivers/r600/r600_shader.c | 15 ++++++++++++++- src/gallium/drivers/r600/r600_shader.h | 1 + src/gallium/drivers/r600/r600d.h | 25 ------------------------- 5 files changed, 16 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fa239a816b5..1afbf892cfd 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e09e02ca000..e6de36ab954 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -430,9 +430,6 @@ #define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) #define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) #define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD #define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) #define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) #define C_028808_DITHER_ENABLE 0xFFFFFFFB diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index df97c32bc74..41849875074 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -175,6 +175,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade R_0288CC_SQ_PGM_CF_OFFSET_PS, 0x00000000, 0xFFFFFFFF, NULL); + if (rshader->fs_write_all) { + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(1), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + } + if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -495,6 +502,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) { struct tgsi_full_immediate *immediate; + struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; @@ -563,7 +571,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.nliterals = 0; ctx.literals = NULL; - + shader->fs_write_all = FALSE; while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -602,6 +610,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s goto out_err; break; case TGSI_TOKEN_TYPE_PROPERTY: + property = &ctx.parse.FullToken.FullProperty; + if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + if (property->u[0].Data == 1) + shader->fs_write_all = TRUE; + } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 935dd6fe3ab..dfa9dd20de7 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -45,6 +45,7 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; + boolean fs_write_all; }; int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8c391936db0..1814f504ed9 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2332,31 +2332,6 @@ #define R_0280D4_CB_COLOR5_TILE 0x0280D4 #define R_0280D8_CB_COLOR6_TILE 0x0280D8 #define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF #define R_028614_SPI_VS_OUT_ID_0 0x028614 #define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) #define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) -- cgit v1.2.3 From 065c8696e7b8290f9361ae88b8a7d99be9e2d0ef Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 31 Jan 2011 13:03:10 +1000 Subject: r600g: fix regression in cubemap tests since eea1d8199b376f37027c14669e0bdf991a22872d Although CUBE is a reduction inst, it writes to more than just PV.X so we need to keep the dst channel. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_asm.c | 17 ++++++++++++++++- src/gallium/drivers/r600/r600_texture.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3b0d01b8720..13bf7644e7e 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -355,6 +355,20 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } +static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + } +} + static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { switch (bc->chiprev) { @@ -722,7 +736,8 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, for (i = 0; i < 5; ++i) { if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; - if (is_alu_reduction_inst(bc, prev[i])) + /* cube writes more than PV.X */ + if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) chan[i] = 0; else chan[i] = prev[i]->dst.chan; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 91076269ec7..e45f4a517e0 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -923,7 +923,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = FMT_10_10_10_2; + result = FMT_2_10_10_10; goto out_word4; } goto out_unknown; -- cgit v1.2.3 From 2f7c876ff5af86c78c0f3debfbdc2a56c7b4d1fe Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 30 Jan 2011 22:41:13 -0500 Subject: r600g: remove some non-existent evergreen reg fields Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/evergreend.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e6de36ab954..f7f0085b6a7 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -427,12 +427,6 @@ #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF #define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -- cgit v1.2.3 From df8089df90de3e720fec46d6118b15094e94ccd7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 31 Jan 2011 16:03:29 +1000 Subject: r600g: fix occlusion query results. Like on some r5xx, there are multiple DB backends on the r600, we need to add up the query results from each of these to get the final correct value. So far I'm not 100% sure how to calculate the num_db, value setting it to 4 should be harmless enough until we do. This fixes occulsion_query piglit test on my rv740. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600.h | 1 + src/gallium/winsys/r600/drm/r600_hw_context.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index b8888bede20..15cfb7f0c46 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -250,6 +250,7 @@ struct r600_context { struct list_head query_list; unsigned num_query_running; struct list_head fenced_bo; + unsigned num_db; /* for OQ */ }; struct r600_draw { diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 6b7e4d886f2..53879a57fa5 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -751,6 +751,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) /* init dirty list */ LIST_INITHEAD(&ctx->dirty); + + /* TODO update this value correctly */ + ctx->num_db = 4; + return 0; out_err: r600_context_fini(ctx); @@ -1252,6 +1256,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu u64 start, end; u32 *results; int i; + int size; if (wait) results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_READ, NULL); @@ -1260,7 +1265,8 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu if (!results) return FALSE; - for (i = 0; i < query->num_results; i += 4) { + size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->num_db : 1); + for (i = 0; i < size; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) @@ -1338,7 +1344,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); - query->num_results += 4; + query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->num_db : 1); query->state ^= R600_QUERY_STATE_STARTED; query->state |= R600_QUERY_STATE_ENDED; ctx->num_query_running--; -- cgit v1.2.3 From 26a4c1cb650eee1380f87f3d7e8cff43c3d6a3e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Jan 2011 02:47:54 -0500 Subject: r600g: fix OQ on evergreen 6xx/7xx have a max of 4 DBs, evergreen have a max of 8. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 2 +- src/gallium/winsys/r600/drm/r600_hw_context.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 15cfb7f0c46..64c52bca795 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -250,7 +250,7 @@ struct r600_context { struct list_head query_list; unsigned num_query_running; struct list_head fenced_bo; - unsigned num_db; /* for OQ */ + unsigned max_db; /* for OQ */ }; struct r600_draw { diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 53879a57fa5..f4e2aaa772c 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -753,7 +753,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->dirty); /* TODO update this value correctly */ - ctx->num_db = 4; + if (radeon->family >= CHIP_CEDAR) + ctx->max_db = 8; + else + ctx->max_db = 4; return 0; out_err: @@ -1265,7 +1268,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu if (!results) return FALSE; - size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->num_db : 1); + size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); for (i = 0; i < size; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; @@ -1344,7 +1347,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); - query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->num_db : 1); + query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); query->state ^= R600_QUERY_STATE_STARTED; query->state |= R600_QUERY_STATE_ENDED; ctx->num_query_running--; -- cgit v1.2.3 From a61b7aa90df7ea4eff9fe34fabba3fb7342a7f77 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 28 Jan 2011 16:09:04 +0100 Subject: svga: Print the number and mnemonic of the opcode we're missing information for. Makes it easier to figure out which opcode it's about. --- src/gallium/drivers/svga/svgadump/svga_shader_op.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index 95612a80063..b40e24f8927 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -156,6 +156,8 @@ const struct sh_opcode_info *svga_opcode_info( uint op ) if (info->svga_opcode == SVGA3DOP_INVALID) { /* No valid information. Please provide number of dst/src registers. */ + _debug_printf("Missing information for opcode %u, '%s'\n", op, + opcode_info[op].mnemonic); assert( 0 ); return NULL; } -- cgit v1.2.3 From 11c11ee0bc96c5b84f98c0223f52fa6d2b4fe5c8 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 28 Jan 2011 16:09:04 +0100 Subject: svga: TEXLDL opcode dst/src register information is correct. --- src/gallium/drivers/svga/svgadump/svga_shader_op.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index b40e24f8927..ad1549d9f81 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -136,7 +136,7 @@ static struct sh_opcode_info opcode_info[] = { "dsy", 1, 1, 0, 0, SVGA3DOP_INVALID, }, { "texldd", 1, 4, 0, 0, SVGA3DOP_INVALID, }, { "setp", 1, 2, 0, 0, SVGA3DOP_SETP, }, - { "texldl", 1, 2, 0, 0, SVGA3DOP_INVALID, }, + { "texldl", 1, 2, 0, 0, SVGA3DOP_TEXLDL, }, { "breakp", 0, 1, 0, 0, SVGA3DOP_INVALID, }, }; -- cgit v1.2.3 From 5a1ce49c82e245f1f86510d9e1ff7db46a32012b Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 28 Jan 2011 16:09:05 +0100 Subject: svga: Fix translation of TGSI SSG opcode. SVGA3D only supports SGN for vertex shaders, and this requires two additional temporary registers for intermediate results. For fragment shaders, lower to two CMPs and one ADD. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 40 ++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index f2591c5721a..5f262b3eadb 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -57,7 +57,6 @@ translate_opcode( case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; - case TGSI_OPCODE_SSG: return SVGA3DOP_SGN; default: debug_printf("Unkown opcode %u\n", opcode); assert( 0 ); @@ -1066,6 +1065,41 @@ static boolean emit_cos(struct svga_shader_emitter *emit, return TRUE; } +static boolean emit_ssg(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp0 = get_temp( emit ); + SVGA3dShaderDestToken temp1 = get_temp( emit ); + struct src_register zero, one; + + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SGN DST, SRC0, TMP0, TMP1 */ + return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, + src( temp0 ), src( temp1 ) ); + } + + zero = get_zero_immediate( emit ); + one = scalar( zero, TGSI_SWIZZLE_W ); + zero = scalar( zero, TGSI_SWIZZLE_X ); + + /* CMP TMP0, SRC0, one, zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp0, dst.mask ), src0, one, zero )) + return FALSE; + + /* CMP TMP1, negate(SRC0), negate(one), zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp1, dst.mask ), negate( src0 ), negate( one ), + zero )) + return FALSE; + + /* ADD DST, TMP0, TMP1 */ + return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), + src( temp1 ) ); +} /* * ADD DST SRC0, negate(SRC0) @@ -2366,6 +2400,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_LRP: return emit_lrp( emit, insn ); + case TGSI_OPCODE_SSG: + return emit_ssg( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); @@ -2715,6 +2752,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) return TRUE; if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) return TRUE; } -- cgit v1.2.3 From a8c144a388a5c7275d8d223c5508c66a0612a1b0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 31 Jan 2011 14:09:17 -0700 Subject: llvmpipe: fix incorrect array index in image dump code --- src/gallium/drivers/llvmpipe/lp_flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 85e3cdec82c..849db06acdf 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -72,7 +72,7 @@ llvmpipe_flush( struct pipe_context *pipe, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); } if (0) { -- cgit v1.2.3 From 7fb722c35c2d2cb74b14417a8ddc0684ed6dd838 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 31 Jan 2011 23:38:10 +0100 Subject: r600g: fix invalid ref count handling in r600_set_constant_buffer Only decrement ref count if r600_upload_const_buffer really changes the buffer. --- src/gallium/drivers/r600/r600_state_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index e086e272c8e..d82985e1b1f 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -433,7 +433,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, return; } - if (!rbuffer->user_buffer) + if (buffer != &rbuffer->r.base.b) pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); } -- cgit v1.2.3 From d171ae086bae37279251a1d6f32e16e333cfc154 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 1 Feb 2011 01:17:02 +0100 Subject: r600g: Actually use the info from the flushed depth texture when creating a sampler view on a depth texture. R600/R700 was using incorrect tiling information from the (compressed) depth buffer. Evergreen worked anyway because tiling doesn't work. --- src/gallium/drivers/r600/evergreen_state.c | 15 ++++++--------- src/gallium/drivers/r600/r600_state.c | 14 +++++--------- 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 1afbf892cfd..5a923db3dfb 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -380,18 +380,15 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ + tmp = (struct r600_resource_texture *)texture; if (tmp->depth) { r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; + tmp = tmp->flushed_depth_texture; } + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; + pitch = align(tmp->pitch_in_pixels[0], 8); /* FIXME properly handle first level != 0 */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8c583b4f39a..d23f242e567 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -428,18 +428,14 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ + tmp = (struct r600_resource_texture *)texture; if (tmp->depth) { r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; + tmp = tmp->flushed_depth_texture; } + rbuffer = &tmp->resource; + bo[0] = rbuffer->bo; + bo[1] = rbuffer->bo; pitch = align(tmp->pitch_in_pixels[0], 8); if (tmp->tiled) { array_mode = tmp->array_mode[0]; -- cgit v1.2.3 From 42b5f6819881e4885ff73a17635eb7d2d341563d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 29 Jan 2011 21:25:37 +1000 Subject: r600g: start looking at evergreen tiling. this just adds the ioctl interface and sets the tile type and array mode in the correct place. This seems to bring eg 1D tiling to the same level, and issues as on r600. No idea how to address 2D yet. --- src/gallium/drivers/r600/evergreen_state.c | 19 ++++++-- src/gallium/drivers/r600/evergreend.h | 3 ++ src/gallium/winsys/r600/drm/r600_drm.c | 77 +++++++++++++++++++++++------- 3 files changed, 79 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 5a923db3dfb..8c4f0f9aad6 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -351,7 +351,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; if (resource == NULL) @@ -390,15 +390,21 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[1] = rbuffer->bo; pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; + } /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1), + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); @@ -635,6 +641,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned color_info; unsigned format, swap, ntype; unsigned offset; + unsigned tile_type; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -659,11 +666,17 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state swap = r600_translate_colorswap(rtex->resource.base.b.format); color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | + S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) color_info |= S_028C70_SOURCE_FORMAT(1); + if (rtex->tiled) { + tile_type = rtex->tile_type; + } else /* workaround for linear buffers */ + tile_type = 1; + /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, @@ -687,7 +700,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, - S_028C74_NON_DISP_TILING_ORDER(1), + S_028C74_NON_DISP_TILING_ORDER(tile_type), 0xFFFFFFFF, bo[0]); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index f7f0085b6a7..dec32b504ee 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -930,6 +930,9 @@ #define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005 #define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006 #define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5) +#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1) +#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF #define S_030000_PITCH(x) (((x) & 0xFFF) << 6) #define G_030000_PITCH(x) (((x) >> 6) & 0xFFF) #define C_030000_PITCH 0xFFFC003F diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 3c7e9aa4490..096c178aa38 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -78,20 +78,8 @@ static int radeon_get_device(struct radeon *radeon) return r; } -static int radeon_drm_get_tiling(struct radeon *radeon) +static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) { - struct drm_radeon_info info; - int r; - uint32_t tiling_config = 0; - - info.request = RADEON_INFO_TILING_CONFIG; - info.value = (uintptr_t)&tiling_config; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - - if (r) - return 0; - switch ((tiling_config & 0xe) >> 1) { case 0: radeon->tiling_info.num_channels = 1; @@ -133,6 +121,62 @@ static int radeon_drm_get_tiling(struct radeon *radeon) return 0; } +static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) +{ + switch (tiling_config & 0xf) { + case 0: + radeon->tiling_info.num_channels = 1; + break; + case 1: + radeon->tiling_info.num_channels = 2; + break; + case 2: + radeon->tiling_info.num_channels = 4; + break; + case 3: + radeon->tiling_info.num_channels = 8; + break; + default: + return -EINVAL; + } + + radeon->tiling_info.num_banks = (tiling_config & 0xf0) >> 4; + + switch ((tiling_config & 0xf00) >> 8) { + case 0: + radeon->tiling_info.group_bytes = 256; + break; + case 1: + radeon->tiling_info.group_bytes = 512; + break; + default: + return -EINVAL; + } + return 0; +} + +static int radeon_drm_get_tiling(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + uint32_t tiling_config = 0; + + info.request = RADEON_INFO_TILING_CONFIG; + info.value = (uintptr_t)&tiling_config; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + + if (r) + return 0; + + if (radeon->chip_class == R600 || radeon->chip_class == R700) { + r = r600_interpret_tiling(radeon, tiling_config); + } else { + r = eg_interpret_tiling(radeon, tiling_config); + } + return r; +} + static int radeon_get_clock_crystal_freq(struct radeon *radeon) { struct drm_radeon_info info; @@ -228,10 +272,9 @@ static struct radeon *radeon_new(int fd, unsigned device) break; } - if (radeon->chip_class == R600 || radeon->chip_class == R700) { - if (radeon_drm_get_tiling(radeon)) - return NULL; - } + if (radeon_drm_get_tiling(radeon)) + return NULL; + /* get the GPU counter frequency, failure is non fatal */ radeon_get_clock_crystal_freq(radeon); -- cgit v1.2.3 From aee5f1e40ca27149a6226187e855125821d96971 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Feb 2011 13:00:56 +1000 Subject: r600: only decompress depth when its tile type is wrong. If the tile type for the buffer is 1 then its been bound to the DB at some point, we need to decompress it, otherwise its only been bound as texture/cb so don't do anything. This fixes 5 piglit tests here on r600g. --- src/gallium/drivers/r600/r600_blit.c | 6 +++++- src/gallium/drivers/r600/r600_state.c | 2 +- src/gallium/drivers/r600/r600_texture.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 83c02e55802..a8e85df3c48 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -129,7 +129,11 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) if (!view) continue; tex = (struct r600_resource_texture *)view->base.texture; - if (!tex->depth) continue; + if (!tex->depth) + continue; + + if (tex->tile_type == 0) + continue; r600_blit_uncompress_depth(&rctx->context, tex); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index d23f242e567..acaa5c05ae8 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -429,7 +429,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; - if (tmp->depth) { + if (tmp->depth && tmp->tile_type == 1) { r600_texture_depth_flush(ctx, texture); tmp = tmp->flushed_depth_texture; } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e45f4a517e0..dd280491984 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -574,7 +574,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; - if (rtex->depth) { + if (rtex->depth && rtex->tile_type == 1) { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture -- cgit v1.2.3 From 8b5a50b31ca272ab8761e2a31025a54d8999ec06 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Feb 2011 13:06:35 +1000 Subject: r600g: fix evergreen for depth decompress test --- src/gallium/drivers/r600/evergreen_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 8c4f0f9aad6..426b3a9d6d8 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -381,7 +381,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; - if (tmp->depth) { + if (tmp->depth && tmp->tile_type == 1) { r600_texture_depth_flush(ctx, texture); tmp = tmp->flushed_depth_texture; } -- cgit v1.2.3 From 11bc8991e94e2fa6d461193a6aff47f8f94b7a47 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Feb 2011 14:38:45 +1000 Subject: r600g: just change tile type when buffer is set to depth. Not 100% sure on this one, but this is how it should work, the question is whether it will uncover other bugs elsewhere. --- src/gallium/drivers/r600/evergreen_state.c | 3 --- src/gallium/drivers/r600/r600_state.c | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 426b3a9d6d8..c64b93bd4fc 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -721,10 +721,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; rtex->tile_type = 1; - rtex->depth = 1; rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index acaa5c05ae8..6adbbd9eabe 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -759,10 +759,9 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; + rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ -- cgit v1.2.3 From 8ca3b140eb53fd8063337a5a2a54a35987d597bc Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 2 Feb 2011 00:24:34 +0100 Subject: r600g: use burst exports in shaders Join multiple exports into just one instruction instead of exporting each register separately. --- src/gallium/drivers/r600/eg_asm.c | 3 ++- src/gallium/drivers/r600/r600_asm.c | 36 ++++++++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_shader.c | 3 +++ 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 67d742b3760..80c5de39750 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -62,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 13bf7644e7e..e910d1cc73f 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -246,6 +246,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) { int r; + if (bc->cf_last && (bc->cf_last->inst == output->inst || + (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && + output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) && + output->type == bc->cf_last->output.type && + output->elem_size == bc->cf_last->output.elem_size && + output->swizzle_x == bc->cf_last->output.swizzle_x && + output->swizzle_y == bc->cf_last->output.swizzle_y && + output->swizzle_z == bc->cf_last->output.swizzle_z && + output->swizzle_w == bc->cf_last->output.swizzle_w && + (output->burst_count + bc->cf_last->output.burst_count) <= 16) { + + if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && + (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.gpr = output->gpr; + bc->cf_last->output.array_base = output->array_base; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + + } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && + output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { + + bc->cf_last->output.end_of_program |= output->end_of_program; + bc->cf_last->output.inst = output->inst; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + } + } + r = r600_bc_add_cf(bc); if (r) return r; @@ -1443,7 +1474,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | @@ -1725,9 +1757,9 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); - fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); fprintf(stderr, "BARRIER:%X ", cf->output.barrier); fprintf(stderr, "INST:%d ", cf->output.inst); + fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); break; case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 278b4466cb0..b91d9b5b41b 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -116,6 +116,7 @@ struct r600_bc_output { unsigned swizzle_y; unsigned swizzle_z; unsigned swizzle_w; + unsigned burst_count; unsigned barrier; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 41849875074..643c47d4bf6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -632,6 +632,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; @@ -695,6 +696,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].barrier = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; @@ -711,6 +713,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_y = 7; output[0].swizzle_z = 7; output[0].swizzle_w = 7; + output[0].burst_count = 1; output[0].barrier = 1; output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; -- cgit v1.2.3 From 50278c0901d07d0b6c8b883683b1f3d96378bdb5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 2 Feb 2011 11:28:41 +0000 Subject: svga: Flush upload buffers or we get asserts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on work from Jakob Bornecrantz, Michel Dänzer, and Brian Paul. --- src/gallium/drivers/svga/svga_context.c | 5 ----- src/gallium/drivers/svga/svga_draw.c | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 61f99d1eb18..2b8a70d18f1 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -207,11 +207,6 @@ void svga_context_flush( struct svga_context *svga, svga->curr.nr_fbs = 0; - /* Unmap upload manager buffers: - */ - u_upload_flush(svga->upload_vb); - u_upload_flush(svga->upload_ib); - /* Ensure that texture dma uploads are processed * before submitting commands. */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 97cbac447d6..2c873a0f7ac 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "svga_context.h" #include "svga_draw.h" @@ -143,6 +144,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) SVGA3dPrimitiveRange *prim; unsigned i; + /* Unmap upload manager vertex buffers */ + u_upload_flush(svga->upload_vb); + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); if (handle == NULL) @@ -151,6 +155,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) vb_handle[i] = handle; } + /* Unmap upload manager index buffers */ + u_upload_flush(svga->upload_ib); + for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); -- cgit v1.2.3 From 843f206a342751084cbd55d9fb0fdf78fd310e99 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Feb 2011 20:30:54 -0700 Subject: softpipe: rename fragment sampler/view fields To be consistant with vertex, geometry sampler fields. --- src/gallium/drivers/softpipe/sp_context.h | 4 ++-- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 035d712d17c..cbb2a3b04b1 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -91,8 +91,8 @@ struct softpipe_context { } so_target; struct pipe_query_data_so_statistics so_stats; - unsigned num_samplers; - unsigned num_sampler_views; + unsigned num_fragment_samplers; + unsigned num_fragment_sampler_views; unsigned num_vertex_samplers; unsigned num_vertex_sampler_views; unsigned num_geometry_samplers; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index d422cb17a4b..6f7addd441a 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -51,7 +51,7 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - for (i = 0; i < softpipe->num_sampler_views; i++) { + for (i = 0; i < softpipe->num_fragment_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]); } for (i = 0; i < softpipe->num_vertex_sampler_views; i++) { diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 38943563800..9bc69f6e0ac 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -76,7 +76,7 @@ softpipe_bind_fragment_sampler_states(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_samplers && + if (num == softpipe->num_fragment_samplers && !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) return; @@ -87,7 +87,7 @@ softpipe_bind_fragment_sampler_states(struct pipe_context *pipe, for (i = num; i < PIPE_MAX_SAMPLERS; ++i) softpipe->sampler[i] = NULL; - softpipe->num_samplers = num; + softpipe->num_fragment_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } @@ -191,7 +191,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_sampler_views && + if (num == softpipe->num_fragment_sampler_views && !memcmp(softpipe->fragment_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) return; @@ -205,7 +205,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe, sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[i], view); } - softpipe->num_sampler_views = num; + softpipe->num_fragment_sampler_views = num; softpipe->dirty |= SP_NEW_TEXTURE; } -- cgit v1.2.3 From 4629be05098a4cfececcfa98ad88282800ab3a6c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Feb 2011 20:43:56 -0700 Subject: softpipe: rename sampler[] -> fragment_samplers[] --- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index cbb2a3b04b1..c91709aef06 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,7 +58,7 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; - struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *fragment_samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 9bc69f6e0ac..60331bc4976 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -77,15 +77,15 @@ softpipe_bind_fragment_sampler_states(struct pipe_context *pipe, /* Check for no-op */ if (num == softpipe->num_fragment_samplers && - !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + !memcmp(softpipe->fragment_samplers, sampler, num * sizeof(void *))) return; draw_flush(softpipe->draw); for (i = 0; i < num; ++i) - softpipe->sampler[i] = sampler[i]; + softpipe->fragment_samplers[i] = sampler[i]; for (i = num; i < PIPE_MAX_SAMPLERS; ++i) - softpipe->sampler[i] = NULL; + softpipe->fragment_samplers[i] = NULL; softpipe->num_fragment_samplers = num; @@ -374,10 +374,10 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { - if (softpipe->sampler[i]) { + if (softpipe->fragment_samplers[i]) { softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, - sp_sampler(softpipe->sampler[i]), + sp_sampler(softpipe->fragment_samplers[i]), softpipe->fragment_sampler_views[i], TGSI_PROCESSOR_FRAGMENT ); -- cgit v1.2.3 From d0293290ad620084d490b51693d97731a8935094 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 3 Feb 2011 13:21:08 +1000 Subject: r600g: set correct pitch/offset for depth textures in flushed state. This fixes zreaddraw in tiling mode --- src/gallium/drivers/r600/r600_texture.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index dd280491984..14422bbfe41 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -586,6 +586,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z); + return &trans->transfer; } else if (use_staging_texture) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; -- cgit v1.2.3 From b13b7b86b2e1165b24a2df20cb67f9f3baa17b13 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 3 Feb 2011 13:12:35 +1000 Subject: r600g: rework dirty / depth texture tracking. this adds a flag to keep track of whether the depth texture structure is the flushed texture or not, so we can avoid doing flushes when we do a hw rendering from one to the other. it also renames flushed to dirty_db which tracks if the DB copy has been dirtied by being bound to the hw. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 9 ++++----- src/gallium/drivers/r600/r600_resource.h | 4 ++-- src/gallium/drivers/r600/r600_state.c | 4 +--- src/gallium/drivers/r600/r600_state_common.c | 2 +- src/gallium/drivers/r600/r600_texture.c | 3 ++- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index a8e85df3c48..ca032811048 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -85,7 +85,8 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t int level = 0; float depth = 1.0f; - if (texture->flushed) return; + if (!texture->dirty_db) + return; surf_tmpl.format = texture->resource.base.b.format; surf_tmpl.u.tex.level = level; @@ -107,10 +108,11 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); r600_blitter_end(ctx); - texture->flushed = true; pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); + + texture->dirty_db = FALSE; } void r600_flush_depth_textures(struct r600_pipe_context *rctx) @@ -132,9 +134,6 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) if (!tex->depth) continue; - if (tex->tile_type == 0) - continue; - r600_blit_uncompress_depth(&rctx->context, tex); } } diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 5b5df5a5bac..8d34b864f82 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -61,9 +61,9 @@ struct r600_resource_texture { unsigned tiled; unsigned tile_type; unsigned depth; - unsigned dirty; + unsigned dirty_db; struct r600_resource_texture *flushed_depth_texture; - bool flushed; + boolean is_flushing_texture; }; #define R600_BUFFER_MAGIC 0xabcd1600 diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 6adbbd9eabe..354d38ec234 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -429,7 +429,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; - if (tmp->depth && tmp->tile_type == 1) { + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture); tmp = tmp->flushed_depth_texture; } @@ -760,8 +760,6 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tile_type = 1; - rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d82985e1b1f..3b037f8c8c2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -602,7 +602,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (rctx->framebuffer.zsbuf) { struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; - ((struct r600_resource_texture *)tex)->flushed = false; + ((struct r600_resource_texture *)tex)->dirty_db = TRUE; } pipe_resource_reference(&draw.index_buffer, NULL); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 14422bbfe41..7b38337eda5 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -513,6 +513,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, return -ENOMEM; } + ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: /* XXX: only do this if the depth texture has actually changed: */ @@ -574,7 +575,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; - if (rtex->depth && rtex->tile_type == 1) { + if (rtex->depth) { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture -- cgit v1.2.3 From 5730d565498cb8b4608fb7ed526172bc4bd84cb9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 3 Feb 2011 13:19:30 +1000 Subject: r600g: only set depth bit for hw accessible depth buffers. If we get a sw accessible buffer like the S8 texture we end up doing depth tracking on it when there is no need since we won't ever bind it to the hardware. This leads to a sw fallback in the transfer destruction which leads to and endless recusion loop of fail in transfer destroy. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_texture.c | 82 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 7b38337eda5..1d0e482253e 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -278,6 +278,45 @@ static void r600_setup_miptree(struct pipe_screen *screen, rtex->size = offset; } +/* Figure out whether u_blitter will fallback to a transfer operation. + * If so, don't use a staging resource. + */ +static boolean permit_hardware_blit(struct pipe_screen *screen, + const struct pipe_resource *res) +{ + unsigned bind; + + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; + + /* See r600_resource_copy_region: there is something wrong + * with depth resource copies at the moment so avoid them for + * now. + */ + if (util_format_get_component_bits(res->format, + UTIL_FORMAT_COLORSPACE_ZS, + 0) != 0) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind, 0)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW, 0)) + return FALSE; + + return TRUE; +} + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -301,8 +340,8 @@ r600_texture_create_object(struct pipe_screen *screen, resource->base.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; - - if (util_format_is_depth_or_stencil(base->format)) + /* only mark depth textures the HW can hit as depth textures */ + if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) rtex->depth = 1; if (array_mode) @@ -324,45 +363,6 @@ r600_texture_create_object(struct pipe_screen *screen, return rtex; } -/* Figure out whether u_blitter will fallback to a transfer operation. - * If so, don't use a staging resource. - */ -static boolean permit_hardware_blit(struct pipe_screen *screen, - const struct pipe_resource *res) -{ - unsigned bind; - - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; - - /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ - if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - return TRUE; -} - struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { -- cgit v1.2.3 From 417cfa60b2fec89423be6ce51ab8b1f3063abb2a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 1 Feb 2011 17:20:53 +1000 Subject: r600g: fix depth hw resource copies. With the previous fixes we can now enabled hw depth copies Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 11 ++--------- src/gallium/drivers/r600/r600_texture.c | 9 --------- 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index ca032811048..c200dd7305b 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -208,15 +208,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned src_level, const struct pipe_box *src_box) { - boolean is_depth; - /* there is something wrong with depth resource copies at the moment so avoid them for now */ - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) - util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - else - r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1d0e482253e..c773c4b84a6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -291,15 +291,6 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, else bind = PIPE_BIND_RENDER_TARGET; - /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ - if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - if (!screen->is_format_supported(screen, res->format, res->target, -- cgit v1.2.3 From aa31a5cbc7b52eb1d03c6eab414479249830eabf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 3 Feb 2011 13:43:37 +1000 Subject: r600g: flush differences back to DB copy. --- src/gallium/drivers/r600/r600_blit.c | 20 ++++++++++++++++++++ src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_texture.c | 8 ++++++++ 3 files changed, 29 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index c200dd7305b..bf21ab432ef 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -219,3 +219,23 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx) rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } + +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_surface *zsurf, *cbsurf, surf_tmpl; + int level = 0; + float depth = 1.0f; + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = texture->resource.base.b.width0; + sbox.height = texture->resource.base.b.height0; + /* XXX that might be wrong */ + sbox.depth = 1; + + r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0, + 0, 0, 0, + (struct pipe_resource *)texture->flushed_depth_texture, 0, + &sbox); +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index beb4db12b08..0d31780e47e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -198,6 +198,7 @@ void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); void r600_flush_depth_textures(struct r600_pipe_context *rctx); /* r600_buffer.c */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index c773c4b84a6..eac40965243 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -630,6 +630,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct pipe_resource *texture = transfer->resource; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; if (rtransfer->staging_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -637,6 +639,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, } pipe_resource_reference(&rtransfer->staging_texture, NULL); } + + if (rtex->depth && !rtex->is_flushing_texture) { + if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture) + r600_blit_push_depth(ctx, rtex); + } + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } -- cgit v1.2.3 From 9d4488e4a8bba2bce89d2c348ddc57ced2c6f6cd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Feb 2011 15:01:09 +0000 Subject: svga: Add an assert to catch reentrancy. --- src/gallium/drivers/svga/svga_swtnl_backend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index d5db6bf641a..087f2d08540 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -90,6 +90,7 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); + assert(!svga_render->vbuf); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, svga_render->vbuf_size); -- cgit v1.2.3 From 5c296a583dd0e93188117188f1d0083057c31eac Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Feb 2011 15:02:07 +0000 Subject: svga: Don't call swc->flush directly. Only svga_context_flush should do it, to ensure upload commands are not submitted to hardware in an inconsistent state. --- src/gallium/drivers/svga/svga_resource_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 7c9e600b9f4..213547115a4 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -204,7 +204,7 @@ svga_transfer_dma_band(struct svga_context *svga, ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); if(ret != PIPE_OK) { - svga->swc->flush(svga->swc, NULL); + svga_context_flush(svga, NULL); ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); assert(ret == PIPE_OK); } -- cgit v1.2.3 From 0f3eeb45c73e77b791a047d7bd1ba0c18accb116 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Feb 2011 15:14:59 +0000 Subject: svga: Temporarily disable buffer DMA upload coalescing. See comment for more details. --- src/gallium/drivers/svga/svga_draw.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 2c873a0f7ac..051ac1c6546 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -224,6 +224,13 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) hwtnl->cmd.prim_count = 0; } + /* + * FIXME: Somehow we are accumulating too many buffer uploads without + * flushing, so temporarily disable buffer upload coalescing to prevent OOM + * crashes (at expense of less performance). + */ + svga_context_flush_buffers(svga); + return PIPE_OK; } -- cgit v1.2.3 From 610c24b19d21f3d147fde4d96a3afaa107670f1e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Feb 2011 16:14:02 +0000 Subject: svga: Fix resource leak; undo temporary workaround. Leak was introduced when fixing strict aliasing violation in this code: the reference counting was preserved, but the destructor call on zero reference count was not. --- src/gallium/drivers/svga/svga_draw.c | 7 ------- src/gallium/drivers/svga/svga_resource_buffer_upload.c | 7 ++++--- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 051ac1c6546..2c873a0f7ac 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -224,13 +224,6 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) hwtnl->cmd.prim_count = 0; } - /* - * FIXME: Somehow we are accumulating too many buffer uploads without - * flushing, so temporarily disable buffer upload coalescing to prevent OOM - * crashes (at expense of less performance). - */ - svga_context_flush_buffers(svga); - return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 3de5216a949..765d2f34082 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -248,6 +248,7 @@ svga_buffer_upload_flush(struct svga_context *svga, { SVGA3dCopyBox *boxes; unsigned i; + struct pipe_resource *dummy; assert(sbuf->handle); assert(sbuf->hwbuf); @@ -289,9 +290,9 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->dma.svga = NULL; sbuf->dma.boxes = NULL; - /* Decrement reference count */ - pipe_reference(&(sbuf->b.b.reference), NULL); - sbuf = NULL; + /* Decrement reference count (and potentially destroy) */ + dummy = &sbuf->b.b; + pipe_resource_reference(&dummy, NULL); } -- cgit v1.2.3 From 126e98966d5396ed251a34e3c39f11b36351a579 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Thu, 3 Feb 2011 21:10:50 +0100 Subject: r600g: Make the buffer and texture vbtls static const. --- src/gallium/drivers/r600/r600_buffer.c | 132 ++++++++++++++++---------------- src/gallium/drivers/r600/r600_texture.c | 100 ++++++++++++------------ 2 files changed, 114 insertions(+), 118 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 2e225860906..045e883cfcf 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -39,7 +39,71 @@ #include "r600.h" #include "r600_pipe.h" -extern struct u_resource_vtbl r600_buffer_vtbl; +static void r600_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct r600_resource_buffer *rbuffer = r600_buffer(buf); + + if (rbuffer->r.bo) { + r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); + } + rbuffer->r.bo = NULL; + FREE(rbuffer); +} + +static void *r600_buffer_transfer_map(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + int write = 0; + uint8_t *data; + + if (rbuffer->user_buffer) + return (uint8_t*)rbuffer->user_buffer + transfer->box.x; + + if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { + /* FIXME */ + } + if (transfer->usage & PIPE_TRANSFER_WRITE) { + write = 1; + } + data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); + if (!data) + return NULL; + + return (uint8_t*)data + transfer->box.x; +} + +static void r600_buffer_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); + + if (rbuffer->user_buffer) + return; + + if (rbuffer->r.bo) + r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); +} + +static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ +} + +static const struct u_resource_vtbl r600_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + u_default_get_transfer, /* get_transfer */ + u_default_transfer_destroy, /* transfer_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + r600_buffer_transfer_flush_region, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) @@ -99,59 +163,6 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, return &rbuffer->r.base.b; } -static void r600_buffer_destroy(struct pipe_screen *screen, - struct pipe_resource *buf) -{ - struct r600_resource_buffer *rbuffer = r600_buffer(buf); - - if (rbuffer->r.bo) { - r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); - } - rbuffer->r.bo = NULL; - FREE(rbuffer); -} - -static void *r600_buffer_transfer_map(struct pipe_context *pipe, - struct pipe_transfer *transfer) -{ - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - int write = 0; - uint8_t *data; - - if (rbuffer->user_buffer) - return (uint8_t*)rbuffer->user_buffer + transfer->box.x; - - if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { - /* FIXME */ - } - if (transfer->usage & PIPE_TRANSFER_WRITE) { - write = 1; - } - data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe); - if (!data) - return NULL; - - return (uint8_t*)data + transfer->box.x; -} - -static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - struct pipe_transfer *transfer) -{ - struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - - if (rbuffer->user_buffer) - return; - - if (rbuffer->r.bo) - r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo); -} - -static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ -} - unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, unsigned level, int layer) @@ -186,19 +197,6 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return &rbuffer->base.b; } -struct u_resource_vtbl r600_buffer_vtbl = -{ - u_default_resource_get_handle, /* get_handle */ - r600_buffer_destroy, /* resource_destroy */ - r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - r600_buffer_transfer_map, /* transfer_map */ - r600_buffer_transfer_flush_region, /* transfer_flush_region */ - r600_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) { struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index eac40965243..c32e541eb50 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -38,8 +38,6 @@ #include "r600d.h" #include "r600_formats.h" -extern struct u_resource_vtbl r600_texture_vtbl; - /* Copy from a full GPU texture to a transfer's staging one. */ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) { @@ -308,6 +306,55 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, return TRUE; } +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + return r600_bo_get_winsys_handle(radeon, resource->bo, + rtex->pitch_in_bytes[0], whandle); +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (resource->bo) { + r600_bo_reference(radeon, &resource->bo, NULL); + } + FREE(rtex); +} + +static unsigned int r600_texture_is_referenced(struct pipe_context *context, + struct pipe_resource *texture, + unsigned level, int layer) +{ + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static const struct u_resource_vtbl r600_texture_vtbl = +{ + r600_texture_get_handle, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_is_referenced, /* is_resource_referenced */ + r600_texture_get_transfer, /* get_transfer */ + r600_texture_transfer_destroy, /* transfer_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region,/* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -377,34 +424,6 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } -static void r600_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *ptex) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - if (rtex->flushed_depth_texture) - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); - - if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); - } - FREE(rtex); -} - -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - return r600_bo_get_winsys_handle(radeon, resource->bo, - rtex->pitch_in_bytes[0], whandle); -} - static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *surf_tmpl) @@ -468,14 +487,6 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, bo); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture) { @@ -726,19 +737,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, r600_bo_unmap(radeon, bo); } -struct u_resource_vtbl r600_texture_vtbl = -{ - r600_texture_get_handle, /* get_handle */ - r600_texture_destroy, /* resource_destroy */ - r600_texture_is_referenced, /* is_resource_referenced */ - r600_texture_get_transfer, /* get_transfer */ - r600_texture_transfer_destroy, /* transfer_destroy */ - r600_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region,/* transfer_flush_region */ - r600_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - void r600_init_surface_functions(struct r600_pipe_context *r600) { r600->context.create_surface = r600_create_surface; -- cgit v1.2.3 From d17d03a8dccb4bad25211693320459420409d997 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Thu, 3 Feb 2011 21:10:50 +0100 Subject: r300g: Make the buffer and texture vbtls static const. --- src/gallium/drivers/r300/r300_screen_buffer.c | 2 +- src/gallium/drivers/r300/r300_texture.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index af6fa1048c8..85057d745e2 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -244,7 +244,7 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, rws->buffer_unmap(rws, rbuf->buf); } -struct u_resource_vtbl r300_buffer_vtbl = +static const struct u_resource_vtbl r300_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ec8608f74bd..7dd2fe326f8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -763,7 +763,7 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +static const struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ -- cgit v1.2.3 From d06b99009699f8bc1d7a1d711bee8edc92dfb1a9 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Thu, 3 Feb 2011 21:10:50 +0100 Subject: r600g: Get rid of the unused r600_cf_vtx_tc() function. --- src/gallium/drivers/r600/r600_asm.c | 36 ------------------------------------ src/gallium/drivers/r600/r600_asm.h | 1 - 2 files changed, 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e910d1cc73f..a43c72bd432 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1877,42 +1877,6 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) 0xFFFFFFFF, ve->fetch_shader); } -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((count - 8) - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} - static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b91d9b5b41b..d6417528ce0 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -204,7 +204,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); -- cgit v1.2.3 From a6a710cbe7425819e1cd5ad5f2085311c092f2e7 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Thu, 3 Feb 2011 21:10:50 +0100 Subject: r600g: Make some more things static. --- src/gallium/drivers/r600/r600_asm.c | 6 +++--- src/gallium/drivers/r600/r600_asm.h | 1 - src/gallium/drivers/r600/r600_buffer.c | 16 ++++++++-------- src/gallium/drivers/r600/r600_pipe.h | 4 ---- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/winsys/r600/drm/radeon_pciid.c | 2 +- 6 files changed, 13 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index a43c72bd432..35a7bc79e04 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -545,7 +545,7 @@ struct alu_bank_swizzle { int hw_cfile_elem[4]; }; -const unsigned cycle_for_bank_swizzle_vec[][3] = { +static const unsigned cycle_for_bank_swizzle_vec[][3] = { [SQ_ALU_VEC_012] = { 0, 1, 2 }, [SQ_ALU_VEC_021] = { 0, 2, 1 }, [SQ_ALU_VEC_120] = { 1, 2, 0 }, @@ -554,7 +554,7 @@ const unsigned cycle_for_bank_swizzle_vec[][3] = { [SQ_ALU_VEC_210] = { 2, 1, 0 } }; -const unsigned cycle_for_bank_swizzle_scl[][3] = { +static const unsigned cycle_for_bank_swizzle_scl[][3] = { [SQ_ALU_SCL_210] = { 2, 1, 0 }, [SQ_ALU_SCL_122] = { 1, 2, 2 }, [SQ_ALU_SCL_212] = { 2, 1, 2 }, @@ -1841,7 +1841,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) { struct r600_pipe_state *rstate; unsigned i = 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index d6417528ce0..510529abc35 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -203,7 +203,6 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 045e883cfcf..c3bc6eadda4 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -51,6 +51,14 @@ static void r600_buffer_destroy(struct pipe_screen *screen, FREE(rbuffer); } +static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned level, int layer) +{ + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { @@ -163,14 +171,6 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, return &rbuffer->r.base.b; } -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 0d31780e47e..d376a777852 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -207,9 +207,6 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); @@ -223,7 +220,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 643c47d4bf6..fa21bbbce1c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -194,7 +194,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade 0xFFFFFFFF, NULL); } -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index 06681791e57..f19956931de 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -32,7 +32,7 @@ struct pci_id { unsigned family; }; -struct pci_id radeon_pci_id[] = { +static const struct pci_id radeon_pci_id[] = { {0x1002, 0x3150, CHIP_RV380}, {0x1002, 0x3152, CHIP_RV380}, {0x1002, 0x3154, CHIP_RV380}, -- cgit v1.2.3 From e40252d4d997ed8040611ebe13a769d0ad9e5582 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 31 Jan 2011 14:41:36 -0700 Subject: gallium/svga: added debug code for dumping framebuffer images (disabled) --- src/gallium/drivers/svga/svga_pipe_flush.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index ab243aa6ec5..6c69d29d15e 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -24,6 +24,7 @@ **********************************************************/ #include "pipe/p_defines.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_surface.h" #include "svga_context.h" @@ -56,6 +57,28 @@ static void svga_flush( struct pipe_context *pipe, SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", __FUNCTION__, flags, fence ? *fence : 0x0); + + /* Enable to dump BMPs of the color/depth buffers each frame */ + if (0) { + if (flags & PIPE_FLUSH_FRAME) { + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]); + } + + if (0 && fb->zsbuf) { + util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf); + } + + ++frame_no; + } + } } -- cgit v1.2.3 From 5026841d5edc1f3c94e9057f7b331d4f56f0199a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Feb 2011 12:41:16 -0700 Subject: svga: rename a couple sampler, sampler view functions --- src/gallium/drivers/svga/svga_pipe_sampler.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index f44a0e1325a..446fcc44078 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -144,8 +144,9 @@ svga_create_sampler_state(struct pipe_context *pipe, return cso; } -static void svga_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +static void +svga_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct svga_context *svga = svga_context(pipe); unsigned i; @@ -203,9 +204,10 @@ svga_sampler_view_destroy(struct pipe_context *pipe, FREE(view); } -static void svga_set_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) +static void +svga_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct svga_context *svga = svga_context(pipe); unsigned flag_1d = 0; @@ -256,9 +258,9 @@ static void svga_set_sampler_views(struct pipe_context *pipe, void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; - svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; + svga->pipe.bind_fragment_sampler_states = svga_bind_fragment_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; - svga->pipe.set_fragment_sampler_views = svga_set_sampler_views; + svga->pipe.set_fragment_sampler_views = svga_set_fragment_sampler_views; svga->pipe.create_sampler_view = svga_create_sampler_view; svga->pipe.sampler_view_destroy = svga_sampler_view_destroy; } -- cgit v1.2.3 From 4b49fcbb9a26680e9a4ef441668e0dd817529d47 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 3 Feb 2011 14:45:40 +1000 Subject: r600g: flush depth texture before a blit from it. If we are going to blit from a depth texture we need to flush it before we blit from it. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index bf21ab432ef..c11268ccca3 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -208,8 +208,14 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned src_level, const struct pipe_box *src_box) { + struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; + + if (rsrc->depth && !rsrc->is_flushing_texture) + r600_texture_depth_flush(ctx, src); + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); + } void r600_init_blit_functions(struct r600_pipe_context *rctx) -- cgit v1.2.3 From 446bc12c1760fe5d402cdd519a7f0e42d89b9696 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:06:02 +1000 Subject: r600g: also check CB bindings for textures to depth flush. This checks the color buffer bindings to make sure there is something to flush. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index c11268ccca3..1a1908031f0 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -136,6 +136,17 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) r600_blit_uncompress_depth(&rctx->context, tex); } + + /* also check CB here */ + for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + struct r600_resource_texture *tex; + tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture; + + if (!tex->depth) + continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } } static void r600_clear(struct pipe_context *ctx, unsigned buffers, -- cgit v1.2.3 From 3e9bc43fbafdd497d475eaffe0deec81b446d122 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:07:08 +1000 Subject: r600g: add a flag to just create flushed texture without flushing. This just adds a flag to create the texture without doing any flushing to it. Flushing occurs in the draw function. This avoids unnecessary flushes when we end up rebinding a CB/DB/texture due to the blitter just restoring state. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_blit.c | 2 +- src/gallium/drivers/r600/r600_resource.h | 2 +- src/gallium/drivers/r600/r600_state.c | 8 +++++++- src/gallium/drivers/r600/r600_texture.c | 7 +++++-- 5 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c64b93bd4fc..00d5d007ddf 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -382,7 +382,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte } tmp = (struct r600_resource_texture *)texture; if (tmp->depth && tmp->tile_type == 1) { - r600_texture_depth_flush(ctx, texture); + r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } rbuffer = &tmp->resource; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 1a1908031f0..2c6d217abe7 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -222,7 +222,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; if (rsrc->depth && !rsrc->is_flushing_texture) - r600_texture_depth_flush(ctx, src); + r600_texture_depth_flush(ctx, src, FALSE); r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 8d34b864f82..8afe866c91e 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -103,7 +103,7 @@ static INLINE boolean r600_is_user_buffer(struct pipe_resource *buffer) return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; } -int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 354d38ec234..19bfa81b99a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -430,7 +430,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c } tmp = (struct r600_resource_texture *)texture; if (tmp->depth && !tmp->is_flushing_texture) { - r600_texture_depth_flush(ctx, texture); + r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } rbuffer = &tmp->resource; @@ -692,6 +692,12 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index c32e541eb50..bc18eef6cfe 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -488,7 +488,7 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, } int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture) + struct pipe_resource *texture, boolean just_create) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; @@ -517,6 +517,9 @@ int r600_texture_depth_flush(struct pipe_context *ctx, ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: + if (just_create) + return 0; + /* XXX: only do this if the depth texture has actually changed: */ r600_blit_uncompress_depth(ctx, rtex); @@ -582,7 +585,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, */ /* XXX: when discard is true, no need to read back from depth texture */ - r = r600_texture_depth_flush(ctx, texture); + r = r600_texture_depth_flush(ctx, texture, FALSE); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); -- cgit v1.2.3 From cd6864c07976fad5f9008206d558dc6c8c599c11 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:08:18 +1000 Subject: r600g: remove unused variables --- src/gallium/drivers/r600/r600_blit.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 2c6d217abe7..de54da8714f 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -239,10 +239,6 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx) void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_surface *zsurf, *cbsurf, surf_tmpl; - int level = 0; - float depth = 1.0f; struct pipe_box sbox; sbox.x = sbox.y = sbox.z = 0; -- cgit v1.2.3 From 2271c793e8650e0e55c054301ab85b5b92b9bf11 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:08:32 +1000 Subject: r600g: flushing texture needs all levels. For mipmap generation we need all levels in the flushing texture. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index bc18eef6cfe..14a289444df 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -501,7 +501,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = 1; - resource.last_level = 0; + resource.last_level = texture->last_level; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; -- cgit v1.2.3 From 8c643446f982892aeec4298977fbbe1ab92206fb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:34:32 +1000 Subject: r600g: evergreen CB check for flushed texture --- src/gallium/drivers/r600/evergreen_state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 00d5d007ddf..73fa1714361 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -647,6 +647,12 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; -- cgit v1.2.3 From 812c314e5161d2b5f91c86ba45b79d4b34046bee Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:36:02 +1000 Subject: r600g: avoid trying to flush the flushing texture. Since these textures still have the depth bit set. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index de54da8714f..af471d0d917 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -134,6 +134,9 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) if (!tex->depth) continue; + if (tex->is_flushing_texture) + continue; + r600_blit_uncompress_depth(&rctx->context, tex); } @@ -145,6 +148,9 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) if (!tex->depth) continue; + if (tex->is_flushing_texture) + continue; + r600_blit_uncompress_depth(&rctx->context, tex); } } -- cgit v1.2.3 From 151a945d38d8c9231799b00364c5b85c8a4b2279 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 09:38:01 +1000 Subject: r600g: get offset for correct texture when setting up CB. this fixes the mipmap tests with tiling forced on. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 19bfa81b99a..8c4baf7cab5 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -704,7 +704,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[2] = rbuffer->bo; /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; -- cgit v1.2.3 From fdd35dc91220046ed66877e60479f155e3a23690 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 15:26:09 +1000 Subject: r600g: fix evergreen sampler view + depth interaction --- src/gallium/drivers/r600/evergreen_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 73fa1714361..b80d8049718 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -381,7 +381,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte R600_ERR("unknow format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; - if (tmp->depth && tmp->tile_type == 1) { + if (tmp->depth && !tmp->is_flushing_texture) { r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } -- cgit v1.2.3 From 3188a7deb3d8bcaee647de27831c0f62b17a6ea9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 4 Feb 2011 15:26:41 +1000 Subject: r600g: don't set tile_type on evergreen. Since we never bind the actual DB to the CB/texture only the flushed one we don't need to track the tile type at the moment. --- src/gallium/drivers/r600/evergreen_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index b80d8049718..d9b8eb768b0 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -727,7 +727,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tile_type = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ -- cgit v1.2.3 From 780c183b8fdf2d301e1eea7f0b83cd96fb6cbf84 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 6 Feb 2011 18:57:11 +1000 Subject: r600g: use surface format not underlying texture format This uses the surface format to set the CB up not the underlying texture format, since these can and do differ. Fixes piglit fbo-srgb. --- src/gallium/drivers/r600/evergreen_state.c | 6 +++--- src/gallium/drivers/r600/r600_state.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index d9b8eb768b0..05539aa61d7 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -664,12 +664,12 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_ARRAY_MODE(rtex->array_mode[level]) | diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8c4baf7cab5..a51e7057eee 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -709,12 +709,12 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_0280A0_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | -- cgit v1.2.3 From 4ad3b27ceee94cff9a0a78e6b778756d3714bdbf Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 2 Feb 2011 06:46:24 +0100 Subject: r300g: RS400 doesn't have ZMASK --- src/gallium/drivers/r300/r300_chipset.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 593eadb9c7d..1968d0feb35 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: -- cgit v1.2.3 From 529d867207a7f14364d9453cc2beaa4da4ea4752 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Feb 2011 21:09:38 +0100 Subject: r300g: do not flush the uploader We don't have to unmap and recreate the upload buffer when a flush occurs. This should also prevent buffer allocations from failing. --- src/gallium/drivers/r300/r300_flush.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index b250532ba92..986ea5ff35a 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -40,9 +40,6 @@ static void r300_flush(struct pipe_context* pipe, struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); - if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); -- cgit v1.2.3 From 1c2a4f0820ff2272f993e6da28dcf8bcbbc3252a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 6 Feb 2011 21:12:19 +0100 Subject: r600g: do not flush the uploader --- src/gallium/drivers/r600/r600_pipe.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 45c093121da..1c903a0b4e1 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -68,9 +68,6 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); - - u_upload_flush(rctx->upload_vb); - u_upload_flush(rctx->upload_const); } static void r600_destroy_context(struct pipe_context *context) -- cgit v1.2.3 From aa8a2224a3df111a1613f0baefebc00883e1b70b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 7 Feb 2011 02:00:56 +0100 Subject: r600g: use the new vertex buffer manager --- src/gallium/drivers/r600/r600_asm.c | 6 +- src/gallium/drivers/r600/r600_blit.c | 12 +- src/gallium/drivers/r600/r600_buffer.c | 115 +++++++----------- src/gallium/drivers/r600/r600_pipe.c | 26 ++--- src/gallium/drivers/r600/r600_pipe.h | 34 +----- src/gallium/drivers/r600/r600_resource.h | 10 +- src/gallium/drivers/r600/r600_state_common.c | 128 ++++---------------- src/gallium/drivers/r600/r600_texture.c | 20 ++-- src/gallium/drivers/r600/r600_translate.c | 169 +-------------------------- 9 files changed, 101 insertions(+), 419 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 35a7bc79e04..46d7fc391c6 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2052,10 +2052,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); - desc = util_format_description(ve->hw_format[i]); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - R600_ERR("unknown format %d\n", ve->hw_format[i]); + R600_ERR("unknown format %d\n", ve->elements[i].src_format); r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); return -EINVAL; } diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index af471d0d917..fbade99fc54 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -54,7 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (rctx->states[R600_PIPE_STATE_CLIP]) { util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffers, rctx->vertex_buffer); + util_blitter_save_vertex_buffers(rctx->blitter, + rctx->vbuf_mgr->nr_vertex_buffers, + rctx->vbuf_mgr->vertex_buffer); if (op & (R600_CLEAR_SURFACE | R600_COPY)) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); @@ -88,13 +90,13 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t if (!texture->dirty_db) return; - surf_tmpl.format = texture->resource.base.b.format; + surf_tmpl.format = texture->resource.b.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); + zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; @@ -248,8 +250,8 @@ void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture struct pipe_box sbox; sbox.x = sbox.y = sbox.z = 0; - sbox.width = texture->resource.base.b.width0; - sbox.height = texture->resource.base.b.height0; + sbox.width = texture->resource.b.b.b.width0; + sbox.height = texture->resource.b.b.b.height0; /* XXX that might be wrong */ sbox.depth = 1; diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index c3bc6eadda4..0a0e3db854a 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -66,8 +66,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, int write = 0; uint8_t *data; - if (rbuffer->user_buffer) - return (uint8_t*)rbuffer->user_buffer + transfer->box.x; + if (rbuffer->r.b.user_ptr) + return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ @@ -87,7 +87,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - if (rbuffer->user_buffer) + if (rbuffer->r.b.user_ptr) return; if (rbuffer->r.bo) @@ -126,20 +126,25 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, return NULL; rbuffer->magic = R600_BUFFER_MAGIC; - rbuffer->user_buffer = NULL; - rbuffer->r.base.b = *templ; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.size = rbuffer->r.base.b.width0; + rbuffer->r.b.b.b = *templ; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.user_ptr = NULL; + rbuffer->r.size = rbuffer->r.b.b.b.width0; rbuffer->r.bo_size = rbuffer->r.size; - bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); + + bo = r600_bo((struct radeon*)screen->winsys, + rbuffer->r.b.b.b.width0, + alignment, rbuffer->r.b.b.b.bind, + rbuffer->r.b.b.b.usage); + if (bo == NULL) { FREE(rbuffer); return NULL; } rbuffer->r.bo = bo; - return &rbuffer->r.base.b; + return &rbuffer->r.b.b.b; } struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, @@ -153,22 +158,22 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, return NULL; rbuffer->magic = R600_BUFFER_MAGIC; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.b.target = PIPE_BUFFER; - rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM; - rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE; - rbuffer->r.base.b.bind = bind; - rbuffer->r.base.b.width0 = bytes; - rbuffer->r.base.b.height0 = 1; - rbuffer->r.base.b.depth0 = 1; - rbuffer->r.base.b.array_size = 1; - rbuffer->r.base.b.flags = 0; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.b.target = PIPE_BUFFER; + rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuffer->r.b.b.b.bind = bind; + rbuffer->r.b.b.b.width0 = bytes; + rbuffer->r.b.b.b.height0 = 1; + rbuffer->r.b.b.b.depth0 = 1; + rbuffer->r.b.b.b.array_size = 1; + rbuffer->r.b.b.b.flags = 0; + rbuffer->r.b.user_ptr = ptr; rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; - rbuffer->user_buffer = ptr; - return &rbuffer->r.base.b; + return &rbuffer->r.b.b.b; } struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, @@ -189,12 +194,12 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, return NULL; } - pipe_reference_init(&rbuffer->base.b.reference, 1); - rbuffer->base.b.target = PIPE_BUFFER; - rbuffer->base.b.screen = screen; - rbuffer->base.vtbl = &r600_buffer_vtbl; + pipe_reference_init(&rbuffer->b.b.b.reference, 1); + rbuffer->b.b.b.target = PIPE_BUFFER; + rbuffer->b.b.b.screen = screen; + rbuffer->b.b.vtbl = &r600_buffer_vtbl; rbuffer->bo = bo; - return &rbuffer->base.b; + return &rbuffer->b.b.b; } void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) @@ -202,59 +207,19 @@ void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); boolean flushed; - u_upload_data(rctx->upload_vb, 0, + u_upload_data(rctx->upload_ib, 0, draw->info.count * draw->index_size, - rbuffer->user_buffer, + rbuffer->r.b.user_ptr, &draw->index_buffer_offset, &draw->index_buffer, &flushed); } -void r600_upload_user_buffers(struct r600_pipe_context *rctx, - int min_index, int max_index) -{ - int i, nr = rctx->vertex_elements->count; - unsigned count = max_index + 1 - min_index; - boolean flushed; - boolean uploaded[32] = {0}; - - for (i = 0; i < nr; i++) { - unsigned index = rctx->vertex_elements->elements[i].vertex_buffer_index; - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[index]; - struct r600_resource_buffer *userbuf = r600_buffer(vb->buffer); - - if (userbuf && userbuf->user_buffer && !uploaded[index]) { - unsigned first, size; - - if (vb->stride) { - first = vb->stride * min_index; - size = vb->stride * count; - } else { - first = 0; - size = rctx->vertex_elements->hw_format_size[i]; - } - - u_upload_data(rctx->upload_vb, first, size, - (uint8_t*)userbuf->user_buffer + first, - &vb->buffer_offset, - &rctx->real_vertex_buffer[index], - &flushed); - - vb->buffer_offset -= first; - - /* vertex_arrays_dirty = TRUE; */ - uploaded[index] = TRUE; - } else { - assert(rctx->real_vertex_buffer[index]); - } - } -} - void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *const_offset) { - if ((*rbuffer)->user_buffer) { - uint8_t *ptr = (*rbuffer)->user_buffer; - unsigned size = (*rbuffer)->r.base.b.width0; + if ((*rbuffer)->r.b.user_ptr) { + uint8_t *ptr = (*rbuffer)->r.b.user_ptr; + unsigned size = (*rbuffer)->r.b.b.b.width0; boolean flushed; *rbuffer = NULL; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 1c903a0b4e1..85ad0ee968b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -76,8 +76,6 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); - r600_end_vertex_translate(rctx); - r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -86,11 +84,9 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_vb); + u_upload_destroy(rctx->upload_ib); u_upload_destroy(rctx->upload_const); - - if (rctx->tran.translate_cache) - translate_cache_destroy(rctx->tran.translate_cache); + u_vbuf_mgr_destroy(rctx->vbuf_mgr); FREE(rctx->ps_resource); FREE(rctx->vs_resource); @@ -164,10 +160,16 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->upload_vb = u_upload_create(&rctx->context, 1024 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER | + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 16, + U_VERTEX_FETCH_BYTE_ALIGNED); + if (!rctx->vbuf_mgr) { + r600_destroy_context(&rctx->context); + return NULL; + } + + rctx->upload_ib = u_upload_create(&rctx->context, 128 * 1024, 16, PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_vb == NULL) { + if (rctx->upload_ib == NULL) { r600_destroy_context(&rctx->context); return NULL; } @@ -185,12 +187,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->tran.translate_cache = translate_cache_create(); - if (rctx->tran.translate_cache == NULL) { - FREE(rctx); - return NULL; - } - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d376a777852..e9820a23911 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,7 +30,7 @@ #include #include #include -#include "translate/translate_cache.h" +#include "util/u_vbuf_mgr.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" @@ -86,9 +86,7 @@ struct r600_vertex_element { unsigned count; struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + struct u_vbuf_mgr_elements *vmgr_elements; struct r600_bo *fetch_shader; unsigned fs_size; struct r600_pipe_state rstate; @@ -117,18 +115,6 @@ struct r600_textures_info { unsigned n_samplers; }; -/* vertex buffer translation context, used to translate vertex input that - * hw doesn't natively support, so far only FLOAT64 is unsupported. - */ -struct r600_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - void *saved_velems; - void *new_velems; -}; - #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 @@ -144,10 +130,6 @@ struct r600_pipe_context { struct r600_vertex_element *vertex_elements; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffers; - unsigned nreal_vertex_buffers; /* with the translated vertex buffer */ unsigned cb_target_mask; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; @@ -165,11 +147,10 @@ struct r600_pipe_context { /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct u_upload_mgr *upload_vb; - unsigned any_user_vbs; struct r600_textures_info ps_samplers; - unsigned vb_max_index; - struct r600_translate_context tran; + + struct u_vbuf_mgr *vbuf_mgr; + struct u_upload_mgr *upload_ib; struct u_upload_mgr *upload_const; bool blit; }; @@ -210,8 +191,6 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -void r600_upload_user_buffers(struct r600_pipe_context *rctx, - int min_index, int max_index); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -250,9 +229,6 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned layer); /* r600_translate.c */ -void r600_begin_vertex_translate(struct r600_pipe_context *rctx, - int min_index, int max_index); -void r600_end_vertex_translate(struct r600_pipe_context *rctx); void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 8afe866c91e..2e7a28cc94f 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -24,6 +24,7 @@ #define R600_RESOURCE_H #include "util/u_transfer.h" +#include "util/u_vbuf_mgr.h" /* flag to indicate a resource is to be used as a transfer so should not be tiled */ #define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV @@ -43,7 +44,7 @@ struct r600_transfer { * underlying implementations. */ struct r600_resource { - struct u_resource base; + struct u_vbuf_resource b; struct r600_bo *bo; u32 size; unsigned bo_size; @@ -68,10 +69,10 @@ struct r600_resource_texture { #define R600_BUFFER_MAGIC 0xabcd1600 +/* XXX this could be removed */ struct r600_resource_buffer { struct r600_resource r; uint32_t magic; - void *user_buffer; }; struct r600_surface { @@ -98,11 +99,6 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf return NULL; } -static INLINE boolean r600_is_user_buffer(struct pipe_resource *buffer) -{ - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; -} - int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create); /* r600_texture.c texture transfer functions. */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3b037f8c8c2..3a959465715 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -123,6 +123,9 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { + u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state, + v->vmgr_elements); + rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); } @@ -140,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) rctx->vertex_elements = NULL; r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -164,52 +168,19 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_vertex_buffer *vbo; - unsigned max_index = ~0; int i; + /* Zero states. */ for (i = 0; i < count; i++) { - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, vbo->buffer); - pipe_resource_reference(&rctx->real_vertex_buffer[i], NULL); - - if (!vbo->buffer) { - /* Zero states. */ + if (!buffers[i].buffer) { if (rctx->family >= CHIP_CEDAR) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } - continue; - } - - if (r600_is_user_buffer(vbo->buffer)) { - rctx->any_user_vbs = TRUE; - continue; - } - - pipe_resource_reference(&rctx->real_vertex_buffer[i], vbo->buffer); - - /* The stride of zero means we will be fetching only the first - * vertex, so don't care about max_index. */ - if (!vbo->stride) { - continue; - } - - /* Update the maximum index. */ - { - unsigned vbo_max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - max_index = MIN2(max_index, vbo_max_index); } } - - for (; i < rctx->nreal_vertex_buffers; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&rctx->real_vertex_buffer[i], NULL); - - /* Zero states. */ + for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { if (rctx->family >= CHIP_CEDAR) { evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } else { @@ -217,72 +188,24 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, } } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - rctx->nvertex_buffers = count; - rctx->nreal_vertex_buffers = count; - rctx->vb_max_index = max_index; + u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers); } - -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - enum pipe_format *format; - int i; assert(count < 32); if (!v) return NULL; v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - - for (i = 0; i < count; i++) { - v->hw_format[i] = v->elements[i].src_format; - format = &v->hw_format[i]; - - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - - /* r600 doesn't seem to support 32_*SCALED, these formats - * aren't in D3D10 either. */ - FORMAT_REPLACE(R32_UNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_USCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); - default:; - } - v->incompatible_layout = - v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i]; - - v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); - } + v->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count, + elements, v->elements); if (r600_vertex_elements_build_fetch_shader(rctx, v)) { FREE(v); @@ -433,7 +356,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, return; } - if (buffer != &rbuffer->r.base.b) + if (buffer != &rbuffer->r.b.b.b) pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); } @@ -449,7 +372,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) rctx->nvs_resource = rctx->vertex_elements->count; } else { /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nreal_vertex_buffers; + rctx->nvs_resource = rctx->vbuf_mgr->nr_real_vertex_buffers; } for (i = 0 ; i < rctx->nvs_resource; i++) { @@ -461,13 +384,13 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) /* one resource per vertex elements */ unsigned vbuffer_index; vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)rctx->real_vertex_buffer[vbuffer_index]; + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index]; offset = rctx->vertex_elements->vbuffer_offset[i]; } else { /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)rctx->real_vertex_buffer[i]; + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i]; offset = 0; } if (vertex_buffer == NULL || rbuffer == NULL) @@ -497,15 +420,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) unsigned prim; r600_flush_depth_textures(rctx); - - if (rctx->vertex_elements->incompatible_layout) { - r600_begin_vertex_translate(rctx, info->min_index, info->max_index); - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx, info->min_index, info->max_index); - } - + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); r600_vertex_buffer_update(rctx); draw.info = *info; @@ -523,7 +438,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.index_buffer_offset = draw.info.start * draw.index_size; draw.info.start = 0; - if (r600_is_user_buffer(draw.index_buffer)) { + if (u_vbuf_resource(draw.index_buffer)->user_ptr) { r600_upload_index_buffer(rctx, &draw); } } else { @@ -607,8 +522,5 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) pipe_resource_reference(&draw.index_buffer, NULL); - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } + u_vbuf_mgr_draw_end(rctx->vbuf_mgr); } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 14a289444df..b7bfdd8c166 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -75,7 +75,7 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, { unsigned offset = rtex->offset[level]; - switch (rtex->resource.base.b.target) { + switch (rtex->resource.b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: return offset + layer * rtex->layer_size[level]; @@ -167,7 +167,7 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned width, stride, tile_width; if (rtex->pitch_override) @@ -188,7 +188,7 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned height, tile_height; height = mip_minify(ptex->height0, level); @@ -211,7 +211,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: @@ -242,7 +242,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); unsigned pitch, size, layer_size, i, offset; @@ -372,10 +372,10 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; resource = &rtex->resource; - resource->base.b = *base; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; + resource->b.b.b = *base; + resource->b.b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->b.b.b.reference, 1); + resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; /* only mark depth textures the HW can hit as depth textures */ @@ -389,7 +389,7 @@ r600_texture_create_object(struct pipe_screen *screen, resource->size = rtex->size; if (!resource->bo) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 68429b99d01..a980eac95e0 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -23,176 +23,11 @@ * Authors: Dave Airlie */ -#include "translate/translate_cache.h" -#include "translate/translate.h" -#include #include +#include "util/u_inlines.h" #include "util/u_upload_mgr.h" #include "r600_pipe.h" -void r600_begin_vertex_translate(struct r600_pipe_context *rctx, - int min_index, int max_index) -{ - struct pipe_context *pipe = &rctx->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r600_vertex_element *ve = rctx->vertex_elements; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; - struct pipe_resource *out_buffer = NULL; - unsigned i, num_verts, out_offset; - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - boolean flushed; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i]) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->elements[i].vertex_buffer_index; - te->input_format = ve->elements[i].src_format; - te->input_offset = ve->elements[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(rctx->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffers; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, - vb_map[i] + vb->buffer_offset + vb->stride * min_index, - vb->stride, ~0); - } - } - - /* Create and map the output buffer. */ - num_verts = max_index + 1 - min_index; - - u_upload_alloc(rctx->upload_vb, - key.output_stride * min_index, - key.output_stride * num_verts, - &out_offset, &out_buffer, &flushed, - (void**)&out_map); - - out_offset -= key.output_stride * min_index; - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffers; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, vb_transfer[i]); - } - } - - /* Find the first free slot. */ - rctx->tran.vb_slot = ~0; - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (!rctx->vertex_buffer[i].buffer) { - rctx->tran.vb_slot = i; - - if (i >= rctx->nvertex_buffers) { - rctx->nreal_vertex_buffers = i+1; - } - break; - } - } - - if (rctx->tran.vb_slot != ~0) { - /* Setup the new vertex buffer. */ - pipe_resource_reference(&rctx->real_vertex_buffer[rctx->tran.vb_slot], out_buffer); - rctx->vertex_buffer[rctx->tran.vb_slot].buffer_offset = out_offset; - rctx->vertex_buffer[rctx->tran.vb_slot].stride = key.output_stride; - - /* Setup new vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } - } - - rctx->tran.saved_velems = rctx->vertex_elements; - rctx->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} - -void r600_end_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - - if (rctx->tran.new_velems == NULL) { - return; - } - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); - rctx->tran.saved_velems = NULL; - pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); - rctx->tran.new_velems = NULL; - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->real_vertex_buffer[rctx->tran.vb_slot], NULL); - rctx->nreal_vertex_buffers = rctx->nvertex_buffers; -} void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, @@ -206,7 +41,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, switch (*index_size) { case 1: - u_upload_alloc(r600->upload_vb, 0, count * 2, + u_upload_alloc(r600->upload_ib, 0, count * 2, &out_offset, &out_buffer, &flushed, &ptr); util_shorten_ubyte_elts_to_userptr( -- cgit v1.2.3 From c95bc1224a4b20b9470ddcb37b5f78975991073b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 7 Feb 2011 02:00:44 +0100 Subject: r300g: use the new vertex buffer manager --- src/gallium/drivers/r300/r300_blit.c | 4 +- src/gallium/drivers/r300/r300_context.c | 28 +--- src/gallium/drivers/r300/r300_context.h | 45 +----- src/gallium/drivers/r300/r300_emit.c | 18 +-- src/gallium/drivers/r300/r300_render.c | 130 +++++++--------- src/gallium/drivers/r300/r300_render_translate.c | 180 ---------------------- src/gallium/drivers/r300/r300_screen_buffer.c | 101 ++++-------- src/gallium/drivers/r300/r300_screen_buffer.h | 11 +- src/gallium/drivers/r300/r300_state.c | 187 ++++------------------- 9 files changed, 144 insertions(+), 560 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 69f8115c32c..e29990d4b95 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -55,8 +55,8 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, - r300->vertex_buffer); + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, + r300->vbuf_mgr->vertex_buffer); if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 114fb316c05..b8b7afa9c27 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -83,12 +83,6 @@ static void r300_release_referenced_objects(struct r300_context *r300) /* The SWTCL VBO. */ pipe_resource_reference(&r300->vbo, NULL); - /* Vertex buffers. */ - for (i = 0; i < r300->real_vertex_buffer_count; i++) { - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); - } - /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { remove_from_list(query); @@ -108,14 +102,11 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->draw) draw_destroy(r300->draw); - if (r300->upload_vb) - u_upload_destroy(r300->upload_vb); + if (r300->vbuf_mgr) + u_vbuf_mgr_destroy(r300->vbuf_mgr); if (r300->upload_ib) u_upload_destroy(r300->upload_ib); - if (r300->tran.translate_cache) - translate_cache_destroy(r300->tran.translate_cache); - /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); @@ -442,6 +433,11 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_init_resource_functions(r300); + r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!r300->vbuf_mgr) + goto fail; + r300->blitter = util_blitter_create(&r300->context); if (r300->blitter == NULL) goto fail; @@ -463,16 +459,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (r300->upload_ib == NULL) goto fail; - r300->upload_vb = u_upload_create(&r300->context, - 1024 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (r300->upload_vb == NULL) - goto fail; - - r300->tran.translate_cache = translate_cache_create(); - if (r300->tran.translate_cache == NULL) - goto fail; - r300_init_states(&r300->context); /* The KIL opcode needs the first texture unit to be enabled diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 430a0ddbb5c..9d2a0b290ae 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,8 +30,7 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_transfer.h" - -#include "translate/translate_cache.h" +#include "util/u_vbuf_mgr.h" #include "r300_defines.h" #include "r300_screen.h" @@ -421,33 +420,16 @@ struct r300_texture { struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + unsigned format_size[PIPE_MAX_ATTRIBS]; - /* If (velem[i].src_format != hw_format[i]), the vertex buffer - * referenced by this vertex element cannot be used for rendering and - * its vertex data must be translated to hw_format[i]. */ - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + struct u_vbuf_mgr_elements *vmgr_elements; /* The size of the vertex, in dwords. */ unsigned vertex_size_dwords; - /* This might mean two things: - * - src_format != hw_format, as discussed above. - * - src_offset % 4 != 0. */ - boolean incompatible_layout; - struct r300_vertex_stream_state vertex_stream; }; -struct r300_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; - unsigned vb_slot; -}; - struct r300_context { /* Parent class */ struct pipe_context context; @@ -474,8 +456,6 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; - /* For translating vertex buffers having incompatible vertex layout. */ - struct r300_translate_context tran; /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -557,15 +537,6 @@ struct r300_context { /* The pointers to the first and the last atom. */ struct r300_atom *first_dirty, *last_dirty; - /* Vertex buffers for Gallium. */ - /* May contain user buffers. */ - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - /* Contains only non-user buffers. */ - struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - int real_vertex_buffer_count; /* with the translated buffer. */ - int vertex_buffer_max_index; - boolean any_user_vbs; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; @@ -592,8 +563,6 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ - boolean incompatible_vb_layout; boolean cbzb_clear; /* Whether ZMASK is enabled. */ @@ -610,10 +579,11 @@ struct r300_context { /* two mem block managers for hiz/zmask ram space */ struct mem_block *hiz_mm; - /* upload managers */ - struct u_upload_mgr *upload_vb; + /* upload manager */ struct u_upload_mgr *upload_ib; + struct u_vbuf_mgr *vbuf_mgr; + struct util_slab_mempool pool_transfers; /* Stat counter. */ @@ -708,9 +678,6 @@ void r300_resume_query(struct r300_context *r300, void r300_stop_query(struct r300_context *r300); /* r300_render_translate.c */ -void r300_begin_vertex_translate(struct r300_context *r300, - int min_index, int max_index); -void r300_end_vertex_translate(struct r300_context *r300); void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 34f87f74d3e..60234497c95 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -812,9 +812,9 @@ void r300_emit_textures_state(struct r300_context *r300, static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size) { - struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vbuf_mgr->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - unsigned *hw_format_size = r300->velems->hw_format_size; + unsigned *hw_format_size = r300->velems->format_size; unsigned size1, size2, vertex_array_count = r300->velems->count; int i; CB_LOCALS; @@ -846,8 +846,8 @@ static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned pac void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) { - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_resource **valid_vbuf = r300->real_vertex_buffer; + struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; + struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; struct r300_buffer *buf; int i; @@ -866,7 +866,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size); } else { struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->hw_format_size; + unsigned *hw_format_size = r300->velems->format_size; unsigned size1, size2; for (i = 0; i < vertex_array_count - 1; i += 2) { @@ -892,7 +892,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde for (i = 0; i < vertex_array_count; i++) { buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b.b); } END_CS; } @@ -1227,9 +1227,9 @@ validate: r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { - struct pipe_resource **buf = r300->real_vertex_buffer; - struct pipe_resource **last = r300->real_vertex_buffer + - r300->real_vertex_buffer_count; + struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; + struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + + r300->vbuf_mgr->nr_real_vertex_buffers; for (; buf != last; buf++) { if (!*buf) continue; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 41ddd748bbf..2b4aa9f438f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -136,7 +136,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) static void r300_split_index_bias(struct r300_context *r300, int index_bias, int *buffer_offset, int *index_offset) { - struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer; + struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; unsigned i, size; int max_neg_bias; @@ -225,7 +225,8 @@ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, int buffer_offset, - int index_bias) + int index_bias, + boolean user_buffers) { boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; @@ -246,10 +247,10 @@ static boolean r300_emit_states(struct r300_context *r300, /* Consider the validation done only if everything was validated. */ if (validate_vbos) { r300->validate_buffers = FALSE; - if (r300->any_user_vbs) + if (user_buffers) r300->upload_vb_validated = TRUE; if (r300->index_buffer.buffer && - r300_is_user_buffer(r300->index_buffer.buffer)) { + r300_buffer(r300->index_buffer.buffer)->b.user_ptr) { r300->upload_ib_validated = TRUE; } } @@ -289,12 +290,14 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, struct pipe_resource *index_buffer, unsigned cs_dwords, int buffer_offset, - int index_bias) + int index_bias, + boolean user_buffers) { if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_FIRST_DRAW; - return r300_emit_states(r300, flags, index_buffer, buffer_offset, index_bias); + return r300_emit_states(r300, flags, index_buffer, buffer_offset, + index_bias, user_buffers); } static boolean immd_is_good_idea(struct r300_context *r300, @@ -325,7 +328,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - buf = r300->real_vertex_buffer[vbi]; + buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) { return FALSE; @@ -376,21 +379,22 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, + FALSE)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - size[i] = r300->velems->hw_format_size[i] / 4; + size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; - vbuf = &r300->vertex_buffer[vbi]; + vbuf = &r300->vbuf_mgr->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - r300->real_vertex_buffer[vbi], + r300->vbuf_mgr->real_vertex_buffer[vbi], PIPE_TRANSFER_READ, &transfer[vbi]); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; @@ -548,7 +552,8 @@ static void r300_draw_range_elements(struct pipe_context* pipe, unsigned maxIndex, unsigned mode, unsigned start, - unsigned count) + unsigned count, + boolean user_buffers) { struct r300_context* r300 = r300_context(pipe); struct pipe_resource *indexBuffer = r300->index_buffer.buffer; @@ -570,7 +575,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* Fallback for misaligned ushort indices. */ if (indexSize == 2 && (start & 1) && - !r300_is_user_buffer(indexBuffer)) { + !r300_buffer(indexBuffer)->b.user_ptr) { struct pipe_transfer *transfer; struct pipe_resource *userbuf; @@ -592,14 +597,14 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } pipe_buffer_unmap(pipe, transfer); } else { - if (r300_is_user_buffer(indexBuffer)) + if (r300_buffer(indexBuffer)->b.user_ptr) r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); } /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias)) + PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias, user_buffers)) goto done; if (alt_num_verts || count <= 65535) { @@ -619,7 +624,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 19, buffer_offset, indexBias)) + indexBuffer, 19, buffer_offset, indexBias, user_buffers)) goto done; } } while (count); @@ -632,7 +637,8 @@ done: } static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) + unsigned start, unsigned count, + boolean user_buffers) { struct r300_context* r300 = r300_context(pipe); boolean alt_num_verts = r300->screen->caps.is_r500 && @@ -646,7 +652,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) + NULL, 9, start, 0, user_buffers)) return; if (alt_num_verts || count <= 65535) { @@ -663,7 +669,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) + start, 0, user_buffers)) return; } } while (count); @@ -676,10 +682,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); unsigned count = info->count; - boolean translate = FALSE; + boolean buffers_updated, uploader_flushed; boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned min_index = 0; - unsigned max_index = r300->vertex_buffer_max_index; if (r300->skip_rendering) { return; @@ -689,12 +693,26 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } + u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, + &buffers_updated, &uploader_flushed); + + if (buffers_updated) { + r300->vertex_arrays_dirty = TRUE; + + if (uploader_flushed || !r300->upload_vb_validated) { + r300->upload_vb_validated = FALSE; + r300->validate_buffers = TRUE; + } + } else { + r300->upload_vb_validated = FALSE; + } + if (indexed) { - int real_min_index, real_max_index; /* Compute the start for draw_elements, taking the offset into account. */ unsigned start_indexed = info->start + (r300->index_buffer.offset / r300->index_buffer.index_size); + int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index); assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); @@ -705,54 +723,21 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } - min_index = MAX2(min_index, info->min_index); - max_index = MIN2(max_index, info->max_index); - real_min_index = (int)min_index - info->index_bias; - real_max_index = (int)max_index - info->index_bias; - if (max_index >= (1 << 24) - 1) { fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index); return; } r300_update_derived_state(r300); - - /* Set up the fallback for an incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300, real_min_index, real_max_index); - translate = TRUE; - } - - /* Upload vertex buffers. */ - if (r300->any_user_vbs) { - r300_upload_user_buffers(r300, real_min_index, real_max_index); - } - - r300_draw_range_elements(pipe, info->index_bias, min_index, max_index, - info->mode, start_indexed, count); + r300_draw_range_elements(pipe, info->index_bias, info->min_index, + max_index, info->mode, start_indexed, count, + buffers_updated); } else { - min_index = MAX2(min_index, info->start); - max_index = MIN2(max_index, info->start + count - 1); - r300_update_derived_state(r300); - - /* Set up the fallback for an incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300, min_index, max_index); - translate = TRUE; - } - - /* Upload vertex buffers. */ - if (r300->any_user_vbs) { - r300_upload_user_buffers(r300, min_index, max_index); - } - - r300_draw_arrays(pipe, info->mode, info->start, count); + r300_draw_arrays(pipe, info->mode, info->start, count, buffers_updated); } - if (translate) { - r300_end_vertex_translate(r300); - } + u_vbuf_mgr_draw_end(r300->vbuf_mgr); } /**************************************************************************** @@ -787,10 +772,10 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->real_vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, + r300->vbuf_mgr->vertex_buffer[i].buffer, PIPE_TRANSFER_READ, &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); @@ -810,8 +795,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->real_vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } @@ -963,12 +948,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 6, 0, 0)) + NULL, 6, 0, 0, FALSE)) return; } else { if (!r300_emit_states(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0)) + NULL, 0, 0, FALSE)) return; } @@ -1020,12 +1005,12 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, FALSE)) return; } else { if (!r300_emit_states(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0)) + NULL, 0, 0, FALSE)) return; } @@ -1062,7 +1047,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, FALSE)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1166,7 +1151,8 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, + FALSE)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index ec4eaa9d624..76d012d81e6 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -20,190 +20,10 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * The functions below translate vertex and index buffers to the layout - * compatible with the hardware, so that all vertex and index fetches are - * DWORD-aligned and all used vertex and index formats are supported. - * For indices, an optional index offset is added to each index. - */ - #include "r300_context.h" -#include "translate/translate.h" #include "util/u_index_modify.h" #include "util/u_upload_mgr.h" -void r300_begin_vertex_translate(struct r300_context *r300, - int min_index, int max_index) -{ - struct pipe_context *pipe = &r300->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r300_vertex_element_state *ve = r300->velems; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; - struct pipe_resource *out_buffer = NULL; - unsigned i, num_verts, out_offset; - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - boolean flushed; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->velem[i].src_format == ve->hw_format[i] && - /* These two are r300-specific. */ - (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->velem[i].vertex_buffer_index; - te->input_format = ve->velem[i].src_format; - te->input_offset = ve->velem[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(r300->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, - vb_map[i] + vb->buffer_offset + vb->stride * min_index, - vb->stride, ~0); - } - } - - /* Create and map the output buffer. */ - num_verts = max_index + 1 - min_index; - - u_upload_alloc(r300->upload_vb, - key.output_stride * min_index, - key.output_stride * num_verts, - &out_offset, &out_buffer, &flushed, - (void**)&out_map); - - out_offset -= key.output_stride * min_index; - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, vb_transfer[i]); - } - } - - /* Setup the new vertex buffer in the first free slot. */ - r300->tran.vb_slot = ~0; - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (!r300->vertex_buffer[i].buffer) { - r300->tran.vb_slot = i; - - if (i >= r300->vertex_buffer_count) { - r300->real_vertex_buffer_count = i+1; - } - - /* r300-specific: */ - r300->validate_buffers = TRUE; - r300->vertex_arrays_dirty = TRUE; - break; - } - } - - if (r300->tran.vb_slot != ~0) { - /* Setup the new vertex buffer. */ - pipe_resource_reference(&r300->real_vertex_buffer[r300->tran.vb_slot], out_buffer); - r300->vertex_buffer[r300->tran.vb_slot].buffer_offset = out_offset; - r300->vertex_buffer[r300->tran.vb_slot].stride = key.output_stride; - - /* Setup new vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->velem[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->velem[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = r300->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->velem[i], - sizeof(struct pipe_vertex_element)); - } - } - - r300->tran.saved_velems = r300->velems; - r300->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} - -void r300_end_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - - if (r300->tran.new_velems == NULL) { - return; - } - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); - r300->tran.saved_velems = NULL; - pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); - r300->tran.new_velems = NULL; - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&r300->real_vertex_buffer[r300->tran.vb_slot], NULL); - r300->real_vertex_buffer_count = r300->vertex_buffer_count; -} void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 85057d745e2..d76524d261d 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -40,7 +40,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, struct r300_context *r300 = r300_context(context); struct r300_buffer *rbuf = r300_buffer(buf); - if (r300_is_user_buffer(buf)) + if (rbuf->b.user_ptr) return PIPE_UNREFERENCED; if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) @@ -62,7 +62,7 @@ void r300_upload_index_buffer(struct r300_context *r300, unsigned count) { unsigned index_offset; - uint8_t *ptr = r300_buffer(*index_buffer)->user_buffer; + uint8_t *ptr = r300_buffer(*index_buffer)->b.user_ptr; boolean flushed; *index_buffer = NULL; @@ -81,51 +81,6 @@ void r300_upload_index_buffer(struct r300_context *r300, } } -void r300_upload_user_buffers(struct r300_context *r300, - int min_index, int max_index) -{ - int i, nr = r300->velems->count; - unsigned count = max_index + 1 - min_index; - boolean flushed; - boolean uploaded[32] = {0}; - - for (i = 0; i < nr; i++) { - unsigned index = r300->velems->velem[i].vertex_buffer_index; - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index]; - struct r300_buffer *userbuf = r300_buffer(vb->buffer); - - if (userbuf && userbuf->user_buffer && !uploaded[index]) { - unsigned first, size; - - if (vb->stride) { - first = vb->stride * min_index; - size = vb->stride * count; - } else { - first = 0; - size = r300->velems->hw_format_size[i]; - } - - u_upload_data(r300->upload_vb, first, size, - userbuf->user_buffer + first, - &vb->buffer_offset, - &r300->real_vertex_buffer[index], - &flushed); - - vb->buffer_offset -= first; - - r300->vertex_arrays_dirty = TRUE; - - if (flushed || !r300->upload_vb_validated) { - r300->upload_vb_validated = FALSE; - r300->validate_buffers = TRUE; - } - uploaded[index] = TRUE; - } else { - assert(r300->real_vertex_buffer[index]); - } - } -} - static void r300_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { @@ -184,8 +139,8 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_buffer *rbuf = r300_buffer(transfer->resource); uint8_t *map; - if (rbuf->user_buffer) - return (uint8_t *) rbuf->user_buffer + transfer->box.x; + if (rbuf->b.user_ptr) + return (uint8_t *) rbuf->b.user_ptr + transfer->box.x; if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; @@ -234,7 +189,7 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, memcpy(rbuf->constant_buffer + box->x, data, box->width); return; } - assert(rbuf->user_buffer == NULL); + assert(rbuf->b.user_ptr == NULL); map = rws->buffer_map(rws, rbuf->buf, r300->cs, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); @@ -268,25 +223,25 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->magic = R300_BUFFER_MAGIC; - rbuf->b.b = *templ; - rbuf->b.vtbl = &r300_buffer_vtbl; - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.b.screen = screen; + rbuf->b.b.b = *templ; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.user_ptr = NULL; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; rbuf->constant_buffer = NULL; - rbuf->user_buffer = NULL; /* Alloc constant buffers in RAM. */ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { rbuf->constant_buffer = MALLOC(templ->width0); - return &rbuf->b.b; + return &rbuf->b.b.b; } rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, alignment, - rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->b.b.b.width0, alignment, + rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); rbuf->cs_buf = r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); @@ -296,7 +251,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, return NULL; } - return &rbuf->b.b; + return &rbuf->b.b.b; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, @@ -310,21 +265,21 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, rbuf->magic = R300_BUFFER_MAGIC; - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.vtbl = &r300_buffer_vtbl; - rbuf->b.b.screen = screen; - rbuf->b.b.target = PIPE_BUFFER; - rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; - rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; - rbuf->b.b.bind = bind; - rbuf->b.b.width0 = ~0; - rbuf->b.b.height0 = 1; - rbuf->b.b.depth0 = 1; - rbuf->b.b.array_size = 1; - rbuf->b.b.flags = 0; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.b.b.target = PIPE_BUFFER; + rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuf->b.b.b.bind = bind; + rbuf->b.b.b.width0 = ~0; + rbuf->b.b.b.height0 = 1; + rbuf->b.b.b.depth0 = 1; + rbuf->b.b.b.array_size = 1; + rbuf->b.b.b.flags = 0; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + rbuf->b.user_ptr = ptr; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; rbuf->constant_buffer = NULL; - rbuf->user_buffer = ptr; - return &rbuf->b.b; + return &rbuf->b.b.b; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 58dec8539b6..1dfbc1399ba 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -46,7 +46,7 @@ struct r300_buffer_range { /* Vertex buffer. */ struct r300_buffer { - struct u_resource b; + struct u_vbuf_resource b; uint32_t magic; @@ -55,15 +55,11 @@ struct r300_buffer enum r300_buffer_domain domain; - uint8_t *user_buffer; uint8_t *constant_buffer; }; /* Functions. */ -void r300_upload_user_buffers(struct r300_context *r300, - int min_index, int max_index); - void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, @@ -87,9 +83,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) return (struct r300_buffer *)buffer; } -static INLINE boolean r300_is_user_buffer(struct pipe_resource *buffer) -{ - return r300_buffer(buffer)->user_buffer ? true : false; -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index dad41ab91ed..aa4e05d4be5 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1478,10 +1478,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - const struct pipe_vertex_buffer *vbo; - unsigned i, max_index = (1 << 24) - 1; - boolean any_user_buffer = FALSE; - boolean any_nonuser_buffer = FALSE; + unsigned i; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ @@ -1491,91 +1488,21 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, count = 1; } - if (count == r300->vertex_buffer_count && - memcmp(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count) == 0) { - return; - } + u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers); if (r300->screen->caps.has_tcl) { /* HW TCL. */ - r300->incompatible_vb_layout = FALSE; - - /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { - if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0 || - buffers[i].buffer_offset % 4 != 0) { - r300->incompatible_vb_layout = TRUE; - break; - } + if (buffers[i].buffer && + !r300_buffer(buffers[i].buffer)->b.user_ptr) { + r300->validate_buffers = TRUE; } } - - for (i = 0; i < count; i++) { - vbo = &buffers[i]; - - /* Skip NULL buffers */ - if (!vbo->buffer) { - continue; - } - - /* User buffers have no info about maximum index, - * we will have to compute it in draw_vbo. */ - if (r300_is_user_buffer(vbo->buffer)) { - any_user_buffer = TRUE; - continue; - } - any_nonuser_buffer = TRUE; - - /* The stride of zero means we will be fetching only the first - * vertex, so don't care about max_index. */ - if (!vbo->stride) - continue; - - /* Update the maximum index. */ - { - unsigned vbo_max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - max_index = MIN2(max_index, vbo_max_index); - } - } - - r300->any_user_vbs = any_user_buffer; - r300->vertex_buffer_max_index = max_index; r300->vertex_arrays_dirty = TRUE; - if (any_nonuser_buffer) - r300->validate_buffers = TRUE; - if (!any_user_buffer) - r300->upload_vb_validated = FALSE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); } - - /* Common code. */ - for (i = 0; i < count; i++) { - vbo = &buffers[i]; - - /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer); - if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) { - pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); - } else { - pipe_resource_reference(&r300->real_vertex_buffer[i], vbo->buffer); - } - } - for (; i < r300->real_vertex_buffer_count; i++) { - /* Dereference any old buffers. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->real_vertex_buffer[i], NULL); - } - - memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); - - r300->vertex_buffer_count = count; - r300->real_vertex_buffer_count = count; } static void r300_set_index_buffer(struct pipe_context* pipe, @@ -1588,7 +1515,7 @@ static void r300_set_index_buffer(struct pipe_context* pipe, memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); if (r300->screen->caps.has_tcl && - !r300_is_user_buffer(ib->buffer)) { + !r300_buffer(ib->buffer)->b.user_ptr) { r300->validate_buffers = TRUE; r300->upload_ib_validated = FALSE; } @@ -1621,7 +1548,7 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ for (i = 0; i < velems->count; i++) { - format = velems->hw_format[i]; + format = velems->velem[i].src_format; type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -1653,16 +1580,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) vstream->count = (i >> 1) + 1; } -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - static void* r300_create_vertex_elements_state(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* attribs) { + struct r300_context *r300 = r300_context(pipe); struct r300_vertex_element_state *velems; unsigned i; - enum pipe_format *format; struct pipe_vertex_element dummy_attrib = {0}; /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */ @@ -1674,77 +1598,26 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); - if (velems != NULL) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); - - if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Set the best hw format in case the original format is not - * supported by hw. */ - for (i = 0; i < count; i++) { - velems->hw_format[i] = velems->velem[i].src_format; - format = &velems->hw_format[i]; - - /* This is basically the list of unsupported formats. - * For now we don't care about the alignment, that's going to - * be sorted out after the PSC setup. */ - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_UNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_USCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_FIXED, R32_FLOAT); - FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); - - default:; - } + if (!velems) + return NULL; - velems->incompatible_layout = - velems->incompatible_layout || - velems->velem[i].src_format != velems->hw_format[i] || - velems->velem[i].src_offset % 4 != 0; - } + velems->count = count; + velems->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs, + velems->velem); - /* Now setup PSC. - * The unused components will be replaced by (..., 0, 1). */ - r300_vertex_psc(velems); - - /* Align the formats to the size of DWORD. - * We only care about the blocksizes of the formats since - * swizzles are already set up. - * Also compute the vertex size. */ - for (i = 0; i < count; i++) { - /* This is OK because we check for aligned strides too - * elsewhere. */ - velems->hw_format_size[i] = - align(util_format_get_blocksize(velems->hw_format[i]), 4); - velems->vertex_size_dwords += velems->hw_format_size[i] / 4; - } + if (r300_screen(pipe->screen)->caps.has_tcl) { + /* Setup PSC. + * The unused components will be replaced by (..., 0, 1). */ + r300_vertex_psc(velems); + + for (i = 0; i < count; i++) { + velems->format_size[i] = + align(util_format_get_blocksize(velems->velem[i].src_format), 4); + velems->vertex_size_dwords += velems->format_size[i] / 4; } } + return velems; } @@ -1760,6 +1633,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; + u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); + if (r300->draw) { draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; @@ -1772,7 +1647,11 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) { - FREE(state); + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1876,8 +1755,8 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, if (buf == NULL || buf->width0 == 0) return; - if (rbuf->user_buffer) - mapped = (uint32_t*)rbuf->user_buffer; + if (rbuf->b.user_ptr) + mapped = (uint32_t*)rbuf->b.user_ptr; else if (rbuf->constant_buffer) mapped = (uint32_t*)rbuf->constant_buffer; else -- cgit v1.2.3 From a22bda9f80070393581c6ac512c03aa3144577ef Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 7 Feb 2011 03:46:25 +0100 Subject: r600g: correctly report supported vertex formats --- src/gallium/drivers/r600/eg_state_inlines.h | 5 ----- src/gallium/drivers/r600/r600_pipe.c | 11 ++++++--- src/gallium/drivers/r600/r600_state_inlines.h | 32 +++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 5a39d7cdeec..ca00e61f52b 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -501,9 +501,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0; -} - #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 85ad0ee968b..04bbcf5f4d1 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -440,9 +440,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; + if (usage & PIPE_BIND_VERTEX_BUFFER) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + enum radeon_family family = r600_get_family(rscreen->radeon); + + if (r600_is_vertex_format_supported(format, family)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + } if (usage & PIPE_BIND_TRANSFER_READ) retval |= PIPE_BIND_TRANSFER_READ; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index d5dabdc69b6..f68bc849e2e 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -495,9 +495,37 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, + enum radeon_family family) { - return r600_translate_colorformat(format) != ~0; + unsigned i; + const struct util_format_description *desc = util_format_description(format); + if (!desc) + return FALSE; + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (i == 4) + return FALSE; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return FALSE; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return FALSE; + + return TRUE; } #endif -- cgit v1.2.3 From d0f2ffad76f58a14d0e85f2b7961f2dbff0d8eeb Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: tgsi_src() can't fail. --- src/gallium/drivers/r600/r600_shader.c | 108 +++++++++------------------------ 1 file changed, 28 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index fa21bbbce1c..ac10e19f098 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -756,9 +756,9 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_bc_alu_src *r600_src) { memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); r600_src->neg = tgsi_src->Register.Negate; @@ -772,7 +772,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) - return 0; + return; } index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; @@ -783,7 +783,6 @@ static int tgsi_src(struct r600_shader_ctx *ctx, r600_src->sel = tgsi_src->Register.Index; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; } - return 0; } static int tgsi_dst(struct r600_shader_ctx *ctx, @@ -831,10 +830,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { nconst++; } - r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); - if (r) { - return r; - } + tgsi_src(ctx, &inst->Src[i], &r600_src[i]); } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { @@ -1223,9 +1219,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[1]); alu.src[1].chan = tgsi_chan(&inst->Src[0], i); } if (i == 3) { @@ -1368,9 +1362,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[i], &alu.src[i]); alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); alu.src[i].abs = 1; } @@ -1417,9 +1409,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[i], &alu.src[i]); alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); } alu.dst.sel = ctx->temp_reg; @@ -1441,9 +1431,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1454,9 +1442,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[1], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; @@ -1693,9 +1679,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; @@ -1711,9 +1695,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[1]); alu.src[1].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1767,13 +1749,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src2_chan = 0; break; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[1]); alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1854,9 +1832,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2218,9 +2194,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); @@ -2251,9 +2225,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); alu.src[0] = r600_src[0]; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -2274,9 +2246,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -2320,9 +2290,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); @@ -2353,9 +2321,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); @@ -2417,10 +2383,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.src[1].sel = ctx->temp_reg; @@ -2441,10 +2404,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -2497,9 +2457,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.last = 1; alu.dst.chan = 0; @@ -2510,9 +2468,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; @@ -2542,9 +2498,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.last = 1; @@ -2573,18 +2527,14 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], i); } if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[1], &alu.src[1]); alu.src[1].chan = tgsi_chan(&inst->Src[1], i); } if (i == 3) @@ -2610,9 +2560,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; -- cgit v1.2.3 From 80235d92e6f0b2ac7b23d5d41b3f1ad0f12f91f2 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: tgsi_dst() can't fail. --- src/gallium/drivers/r600/r600_shader.c | 101 +++++++++------------------------ 1 file changed, 26 insertions(+), 75 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ac10e19f098..43c2f285cde 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -785,10 +785,10 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } } -static int tgsi_dst(struct r600_shader_ctx *ctx, - const struct tgsi_full_dst_register *tgsi_dst, - unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) +static void tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -801,7 +801,6 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - return 0; } static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) @@ -926,9 +925,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { @@ -1098,9 +1095,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1130,9 +1125,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1146,9 +1139,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1164,9 +1155,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = 0; @@ -1184,9 +1173,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1255,9 +1242,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1269,9 +1254,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.src[0] = r600_src[0]; alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1282,9 +1265,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1301,9 +1282,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); alu.src[0] = r600_src[0]; alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1336,9 +1315,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1387,9 +1364,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; @@ -1506,9 +1481,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -1541,9 +1514,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru alu.dst.chan = i; } else { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; } @@ -1582,10 +1553,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -1620,10 +1588,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ @@ -1920,10 +1885,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[1] = r600_src[2]; alu.src[1].chan = tgsi_chan(&inst->Src[2], i); alu.omod = 3; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -1995,10 +1957,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -2040,10 +1999,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) alu.src[2] = r600_src[1]; alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2163,11 +2119,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (use_temp) alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + else + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2520,9 +2473,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; -- cgit v1.2.3 From 3b1c1f02537544a11772b94a8f2e8c3d4c886ca8 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Store literal values in the r600_bc_alu_src structure. This is much easier to work with, and allows use to get rid of some of the literal handling hacks. --- src/gallium/drivers/r600/r600_asm.h | 2 +- src/gallium/drivers/r600/r600_shader.c | 25 ++++++++++++------------- src/gallium/drivers/r600/r600_shader.h | 2 -- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 510529abc35..82456d986ce 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -34,7 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; - u32 *value; + uint32_t value[4]; }; struct r600_bc_alu_dst { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 43c2f285cde..2194c17961f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -232,12 +232,12 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - u32 *literals; int r; /* Would like some magic "get_bool_option_once" routine. @@ -250,13 +250,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s tgsi_dump(tokens, 0); } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader, &literals); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } r = r600_bc_build(&shader->shader.bc); - free(literals); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -499,7 +498,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; @@ -736,7 +735,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; } - *literals = ctx.literals; + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; out_err: @@ -776,7 +775,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; - r600_src->value = ctx->literals + index * 4; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; @@ -877,7 +876,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; - alu.src[0].value = r600_src[i].value; + alu.src[0].value[k] = r600_src[i].value[k]; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -1007,7 +1006,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value = (uint32_t *)&half_inv_pi; + alu.src[1].value[0] = *(uint32_t *)&half_inv_pi; alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; @@ -1046,8 +1045,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[2].chan = 0; if (ctx->bc->chiprev == CHIPREV_R600) { - alu.src[1].value = (uint32_t *)&double_pi; - alu.src[2].value = (uint32_t *)&neg_pi; + alu.src[1].value[0] = *(uint32_t *)&double_pi; + alu.src[2].value[0] = *(uint32_t *)&neg_pi; } else { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[2].sel = V_SQ_ALU_SRC_0_5; @@ -1757,7 +1756,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value[0] = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1778,7 +1777,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value[0] = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index dfa9dd20de7..8f96ce5085c 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -48,6 +48,4 @@ struct r600_shader { boolean fs_write_all; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); - #endif -- cgit v1.2.3 From a77e813de32643ae2dfffd7ad12abed596172cab Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Split r600_bc_alu_src. The r600_bc_alu_src structure is used in two different ways, as a vector and for the individual channels of that same vector. This is somewhat fragile, and probably confusing. --- src/gallium/drivers/r600/r600_asm.c | 7 +- src/gallium/drivers/r600/r600_asm.h | 2 +- src/gallium/drivers/r600/r600_shader.c | 295 ++++++++++++++++----------------- 3 files changed, 147 insertions(+), 157 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 46d7fc391c6..49e48667fa7 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -848,7 +848,7 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, for (i = 0; i < num_src; ++i) { if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - uint32_t value = alu->src[i].value[alu->src[i].chan]; + uint32_t value = alu->src[i].value; unsigned found = 0; for (j = 0; j < *nliteral; ++j) { if (literal[j] == value) { @@ -875,7 +875,7 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc, for (i = 0; i < num_src; ++i) { if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - uint32_t value = alu->src[i].value[alu->src[i].chan]; + uint32_t value = alu->src[i].value; for (j = 0; j < nliteral; ++j) { if (literal[j] == value) { alu->src[i].chan = j; @@ -1178,8 +1178,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->ngpr = nalu->src[i].sel + 1; } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) - r600_bc_special_constants( - nalu->src[i].value[nalu->src[i].chan], + r600_bc_special_constants(nalu->src[i].value, &nalu->src[i].sel, &nalu->src[i].neg); } if (nalu->dst.sel >= bc->ngpr) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 82456d986ce..921d0d98454 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -34,7 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; - uint32_t value[4]; + uint32_t value; }; struct r600_bc_alu_dst { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2194c17961f..abb9ee0e8c5 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -280,6 +280,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader */ struct r600_shader_tgsi_instruction; +struct r600_shader_src { + unsigned sel; + unsigned swizzle[4]; + unsigned neg; + unsigned abs; + unsigned rel; + uint32_t value[4]; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -755,11 +764,27 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } +static void r600_bc_src(struct r600_bc_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) +{ + bc_src->sel = shader_src->sel; + bc_src->chan = shader_src->swizzle[chan]; + bc_src->neg = shader_src->neg; + bc_src->abs = shader_src->abs; + bc_src->rel = shader_src->rel; + bc_src->value = shader_src->value[bc_src->chan]; +} + static void tgsi_src(struct r600_shader_ctx *ctx, const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) + struct r600_shader_src *r600_src) { - memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { @@ -802,23 +827,7 @@ static void tgsi_dst(struct r600_shader_ctx *ctx, } } -static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) -{ - switch (swizzle) { - case 0: - return tgsi_src->Register.SwizzleX; - case 1: - return tgsi_src->Register.SwizzleY; - case 2: - return tgsi_src->Register.SwizzleZ; - case 3: - return tgsi_src->Register.SwizzleW; - default: - return 0; - } -} - -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_src r600_src[3]) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; @@ -857,7 +866,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_shader_src r600_src[3]) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; @@ -876,7 +885,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; - alu.src[0].value[k] = r600_src[i].value[k]; + alu.src[0].value = r600_src[i].value[k]; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -908,7 +917,7 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -929,15 +938,11 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &r600_src[j], i); } } else { - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &r600_src[1], i); + r600_bc_src(&alu.src[1], &r600_src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -976,13 +981,12 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * see fdo bug 27901 */ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_bc_alu_src r600_src[3]) + struct r600_shader_src r600_src[3]) { static float half_inv_pi = 1.0 /(3.1415926535 * 2); static float double_pi = 3.1415926535 * 2; static float neg_pi = -3.1415926535; - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int r; struct r600_bc_alu alu; @@ -1001,12 +1005,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value[0] = *(uint32_t *)&half_inv_pi; + alu.src[1].value = *(uint32_t *)&half_inv_pi; alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; @@ -1045,8 +1048,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[2].chan = 0; if (ctx->bc->chiprev == CHIPREV_R600) { - alu.src[1].value[0] = *(uint32_t *)&double_pi; - alu.src[2].value[0] = *(uint32_t *)&neg_pi; + alu.src[1].value = *(uint32_t *)&double_pi; + alu.src[2].value = *(uint32_t *)&neg_pi; } else { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[2].sel = V_SQ_ALU_SRC_0_5; @@ -1063,7 +1066,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1107,7 +1110,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1205,8 +1208,9 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + struct r600_shader_src r600_src; + tgsi_src(ctx, &inst->Src[0], &r600_src); + r600_bc_src(&alu.src[1], &r600_src, i); } if (i == 3) { alu.last = 1; @@ -1225,8 +1229,8 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int r; r = tgsi_split_constant(ctx, r600_src); @@ -1250,7 +1254,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - alu.src[0] = r600_src[0]; + r600_bc_src(&alu.src[0], &r600_src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); @@ -1279,8 +1283,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &r600_src[0], 1); tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1293,13 +1296,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + r600_bc_src(&alu.src[0], &r600_src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[2], &r600_src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1338,8 +1339,10 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + struct r600_shader_src r600_src; + + tgsi_src(ctx, &inst->Src[i], &r600_src); + r600_bc_src(&alu.src[i], &r600_src, 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1377,14 +1380,15 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int i, r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + tgsi_src(ctx, &inst->Src[i], &r600_src); + r600_bc_src(&alu.src[i], &r600_src, 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1399,14 +1403,17 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src[2]; struct r600_bc_alu alu; int r; + tgsi_src(ctx, &inst->Src[0], &r600_src[0]); + tgsi_src(ctx, &inst->Src[1], &r600_src[1]); + /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; @@ -1416,8 +1423,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[0], &r600_src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1441,8 +1447,8 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int i, r; r = tgsi_split_constant(ctx, r600_src); @@ -1461,13 +1467,10 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - + r600_bc_src(&alu.src[0], &r600_src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; + r600_bc_src(&alu.src[2], &r600_src[0], i); - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1530,7 +1533,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1548,8 +1551,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &r600_src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1569,7 +1571,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; @@ -1583,8 +1585,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &r600_src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1640,12 +1641,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + struct r600_shader_src r600_src; + /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + tgsi_src(ctx, &inst->Src[0], &r600_src); + r600_bc_src(&alu.src[0], &r600_src, 3); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -1659,8 +1662,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &r600_src, i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1684,8 +1686,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { + struct r600_shader_src r600_src; int src_chan, src2_chan; + tgsi_src(ctx, &inst->Src[0], &r600_src); + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1713,10 +1718,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src2_chan = 0; break; } - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + r600_bc_src(&alu.src[0], &r600_src, src_chan); + r600_bc_src(&alu.src[1], &r600_src, src2_chan); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1756,7 +1759,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value[0] = *(uint32_t *)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1777,7 +1780,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value[0] = *(uint32_t *)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1793,11 +1796,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (src_not_temp) { + struct r600_shader_src r600_src; + + tgsi_src(ctx, &inst->Src[0], &r600_src); for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &r600_src, i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1858,7 +1863,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; @@ -1879,10 +1884,8 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[0], &r600_src[1], i); + r600_bc_src(&alu.src[1], &r600_src[2], i); alu.omod = 3; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; @@ -1905,8 +1908,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &r600_src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1928,8 +1930,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[1], &r600_src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -1949,10 +1950,8 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - alu.src[1] = r600_src[1]; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[0], &r600_src[0], i); + r600_bc_src(&alu.src[1], &r600_src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -1971,7 +1970,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); @@ -1989,15 +1988,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); - - alu.src[2] = r600_src[1]; - alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - + r600_bc_src(&alu.src[0], &r600_src[0], i); + r600_bc_src(&alu.src[1], &r600_src[2], i); + r600_bc_src(&alu.src[2], &r600_src[1], i); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; @@ -2014,7 +2007,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2033,32 +2026,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &r600_src[0], 2); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src[0], 0); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &r600_src[0], 1); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &r600_src[1], 1); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &r600_src[1], 2); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &r600_src[1], 0); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2080,32 +2071,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &r600_src[0], 1); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &r600_src[0], 2); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src[0], 0); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &r600_src[1], 2); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &r600_src[1], 0); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &r600_src[1], 1); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2137,18 +2126,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3] = { { 0 } }; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; + tgsi_src(ctx, &inst->Src[0], &r600_src); + /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2176,9 +2165,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - alu.src[0] = r600_src[0]; - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2198,8 +2185,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2234,17 +2220,18 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) static int tgsi_log(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; + tgsi_src(ctx, &inst->Src[0], &r600_src); + /* result.x = floor(log2(src)); */ if (inst->Dst[0].Register.WriteMask & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2273,9 +2260,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2335,8 +2320,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2356,8 +2340,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2393,8 +2376,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx) static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; + + tgsi_src(ctx, &inst->Src[0], &r600_src); + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2409,8 +2396,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.last = 1; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -2420,7 +2406,6 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.last = 1; @@ -2433,8 +2418,12 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; + + tgsi_src(ctx, &inst->Src[0], &r600_src); + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2449,9 +2438,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) return -1; } - - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.last = 1; @@ -2465,9 +2452,13 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) static int tgsi_opdst(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src[2]; struct r600_bc_alu alu; int i, r = 0; + tgsi_src(ctx, &inst->Src[0], &r600_src[0]); + tgsi_src(ctx, &inst->Src[1], &r600_src[1]); + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2477,15 +2468,13 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &r600_src[0], i); } - if (i == 0 || i == 2) { + if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[1], &r600_src[1], i); } if (i == 3) alu.last = 1; @@ -2499,9 +2488,12 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; + tgsi_src(ctx, &inst->Src[0], &r600_src); + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = opcode; alu.predicate = 1; @@ -2510,8 +2502,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &r600_src, 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; -- cgit v1.2.3 From 1fa95c7f9e7f1b63364b1f9c6289690418cf6313 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Do the tgsi_full_src_register to r600_shader_src conversion in r600_shader_from_tgsi(). --- src/gallium/drivers/r600/r600_shader.c | 305 ++++++++++++++------------------- 1 file changed, 129 insertions(+), 176 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index abb9ee0e8c5..f9ca9a85285 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -299,6 +299,7 @@ struct r600_shader_ctx { struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; + struct r600_shader_src src[3]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -507,10 +508,44 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_shader_src *r600_src) +{ + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; + r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } +} + static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; + struct tgsi_full_instruction *inst; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; @@ -608,7 +643,12 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh ctx.max_driver_temp_used = 0; /* reserve first tmp for everyone */ r600_get_temp(&ctx); - opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + + inst = &ctx.parse.FullToken.FullInstruction; + opcode = inst->Instruction.Opcode; + for (i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + tgsi_src(&ctx, &inst->Src[i], &ctx.src[i]); + } if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -776,39 +816,6 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src, bc_src->value = shader_src->value[bc_src->chan]; } -static void tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_shader_src *r600_src) -{ - memset(r600_src, 0, sizeof(*r600_src)); - r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; - r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; - r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; - r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; - r600_src->neg = tgsi_src->Register.Negate; - r600_src->abs = tgsi_src->Register.Absolute; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - int index; - if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { - - index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); - if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) - return; - } - index = tgsi_src->Register.Index; - r600_src->sel = V_SQ_ALU_SRC_LITERAL; - memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); - } else { - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; - r600_src->sel = tgsi_src->Register.Index; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - } -} - static void tgsi_dst(struct r600_shader_ctx *ctx, const struct tgsi_full_dst_register *tgsi_dst, unsigned swizzle, @@ -827,7 +834,7 @@ static void tgsi_dst(struct r600_shader_ctx *ctx, } } -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_src r600_src[3]) +static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; @@ -837,7 +844,6 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_s if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { nconst++; } - tgsi_src(ctx, &inst->Src[i], &r600_src[i]); } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { @@ -845,9 +851,9 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_s for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; + alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; - alu.src[0].rel = r600_src[i].rel; + alu.src[0].rel = ctx->src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -857,8 +863,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_s if (r) return r; } - r600_src[i].sel = treg; - r600_src[i].rel =0; + ctx->src[i].sel = treg; + ctx->src[i].rel =0; j--; } } @@ -866,26 +872,26 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_shader_s } /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_shader_src r600_src[3]) +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, j, k, nliteral, r; for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { nliteral++; } } for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; + alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; - alu.src[0].value = r600_src[i].value[k]; + alu.src[0].value = ctx->src[i].value[k]; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -895,7 +901,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r600_src[i].sel = treg; + ctx->src[i].sel = treg; j--; } } @@ -917,15 +923,14 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; for (i = 0; i < lasti + 1; i++) { @@ -938,11 +943,11 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &r600_src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } } else { - r600_bc_src(&alu.src[0], &r600_src[1], i); - r600_bc_src(&alu.src[1], &r600_src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -980,8 +985,7 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * r700 - normalize by dividing by 2PI * see fdo bug 27901 */ -static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_shader_src r600_src[3]) +static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { static float half_inv_pi = 1.0 /(3.1415926535 * 2); static float double_pi = 3.1415926535 * 2; @@ -990,10 +994,10 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, int r; struct r600_bc_alu alu; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; @@ -1005,7 +1009,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - r600_bc_src(&alu.src[0], &r600_src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; @@ -1066,12 +1070,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; @@ -1110,7 +1113,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1118,7 +1120,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) * X or Y components of the destination vector. */ if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; } @@ -1192,7 +1194,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, r; @@ -1208,9 +1209,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - struct r600_shader_src r600_src; - tgsi_src(ctx, &inst->Src[0], &r600_src); - r600_bc_src(&alu.src[1], &r600_src, i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; @@ -1229,14 +1228,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int r; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; @@ -1254,7 +1252,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &r600_src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); @@ -1283,7 +1281,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &r600_src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1296,11 +1294,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - r600_bc_src(&alu.src[0], &r600_src[0], 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - r600_bc_src(&alu.src[2], &r600_src[0], 0); + r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1339,10 +1337,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct r600_shader_src r600_src; - - tgsi_src(ctx, &inst->Src[i], &r600_src); - r600_bc_src(&alu.src[i], &r600_src, 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1380,15 +1375,13 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int i, r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - tgsi_src(ctx, &inst->Src[i], &r600_src); - r600_bc_src(&alu.src[i], &r600_src, 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1402,18 +1395,13 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[2]; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src[0]); - tgsi_src(ctx, &inst->Src[1], &r600_src[1]); - /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &r600_src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; @@ -1423,7 +1411,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &r600_src[1], 0); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1447,14 +1435,13 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, r; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; @@ -1467,9 +1454,9 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - r600_bc_src(&alu.src[0], &r600_src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; - r600_bc_src(&alu.src[2], &r600_src[0], i); + r600_bc_src(&alu.src[2], &ctx->src[0], i); if (i == 3) alu.last = 1; @@ -1533,15 +1520,14 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; for (i = 0; i < lasti + 1; i++) { @@ -1551,7 +1537,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &r600_src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1571,21 +1557,20 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bc_src(&alu.src[j], &r600_src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1641,13 +1626,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { - struct r600_shader_src r600_src; - /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - tgsi_src(ctx, &inst->Src[0], &r600_src); - r600_bc_src(&alu.src[0], &r600_src, 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; @@ -1662,7 +1644,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r600_bc_src(&alu.src[1], &r600_src, i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1686,11 +1668,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { - struct r600_shader_src r600_src; int src_chan, src2_chan; - tgsi_src(ctx, &inst->Src[0], &r600_src); - /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1718,8 +1697,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src2_chan = 0; break; } - r600_bc_src(&alu.src[0], &r600_src, src_chan); - r600_bc_src(&alu.src[1], &r600_src, src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src_chan); + r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1796,13 +1775,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (src_not_temp) { - struct r600_shader_src r600_src; - - tgsi_src(ctx, &inst->Src[0], &r600_src); for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bc_src(&alu.src[0], &r600_src, i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1863,29 +1839,28 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; /* optimize if it's just an equal balance */ - if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - r600_bc_src(&alu.src[0], &r600_src[1], i); - r600_bc_src(&alu.src[1], &r600_src[2], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; @@ -1908,7 +1883,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r600_bc_src(&alu.src[1], &r600_src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1930,7 +1905,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - r600_bc_src(&alu.src[1], &r600_src[2], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -1950,8 +1925,8 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - r600_bc_src(&alu.src[0], &r600_src[0], i); - r600_bc_src(&alu.src[1], &r600_src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; @@ -1970,15 +1945,14 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; @@ -1988,9 +1962,9 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - r600_bc_src(&alu.src[0], &r600_src[0], i); - r600_bc_src(&alu.src[1], &r600_src[2], i); - r600_bc_src(&alu.src[2], &r600_src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bc_src(&alu.src[2], &ctx->src[1], i); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; @@ -2007,7 +1981,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[3]; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2015,10 +1988,10 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_split_constant(ctx); if (r) return r; - r = tgsi_split_literal_constant(ctx, r600_src); + r = tgsi_split_literal_constant(ctx); if (r) return r; @@ -2028,13 +2001,13 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) switch (i) { case 0: - r600_bc_src(&alu.src[0], &r600_src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 1: - r600_bc_src(&alu.src[0], &r600_src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 2: - r600_bc_src(&alu.src[0], &r600_src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; @@ -2043,13 +2016,13 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) switch (i) { case 0: - r600_bc_src(&alu.src[1], &r600_src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 1: - r600_bc_src(&alu.src[1], &r600_src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 2: - r600_bc_src(&alu.src[1], &r600_src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2073,13 +2046,13 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) switch (i) { case 0: - r600_bc_src(&alu.src[0], &r600_src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 1: - r600_bc_src(&alu.src[0], &r600_src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 2: - r600_bc_src(&alu.src[0], &r600_src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; @@ -2088,13 +2061,13 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) switch (i) { case 0: - r600_bc_src(&alu.src[1], &r600_src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 1: - r600_bc_src(&alu.src[1], &r600_src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 2: - r600_bc_src(&alu.src[1], &r600_src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2126,18 +2099,15 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src); - /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2165,7 +2135,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2185,7 +2155,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2220,18 +2190,15 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) static int tgsi_log(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src); - /* result.x = floor(log2(src)); */ if (inst->Dst[0].Register.WriteMask & 1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2260,7 +2227,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2320,7 +2287,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2340,7 +2307,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2376,12 +2343,9 @@ static int tgsi_log(struct r600_shader_ctx *ctx) static int tgsi_eg_arl(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src); - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2396,7 +2360,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -2418,12 +2382,9 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src); - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2438,7 +2399,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) return -1; } - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; @@ -2452,13 +2413,9 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) static int tgsi_opdst(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src[2]; struct r600_bc_alu alu; int i, r = 0; - tgsi_src(ctx, &inst->Src[0], &r600_src[0]); - tgsi_src(ctx, &inst->Src[1], &r600_src[1]); - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2468,13 +2425,13 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[0], &r600_src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); } if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r600_bc_src(&alu.src[1], &r600_src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; @@ -2487,13 +2444,9 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_shader_src r600_src; struct r600_bc_alu alu; int r; - tgsi_src(ctx, &inst->Src[0], &r600_src); - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = opcode; alu.predicate = 1; @@ -2502,7 +2455,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r600_bc_src(&alu.src[0], &r600_src, 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; -- cgit v1.2.3 From 7687eabaa0470261e059a2d6502628fffd209345 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Split constants in r600_shader_from_tgsi(). --- src/gallium/drivers/r600/r600_shader.c | 220 ++++++++++++--------------------- 1 file changed, 80 insertions(+), 140 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f9ca9a85285..50f9ed6eda4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -541,11 +541,85 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } } +static int tgsi_split_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].rel = ctx->src[i].rel; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + ctx->src[i].rel =0; + j--; + } + } + return 0; +} + +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + nliteral++; + } + } + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].value = ctx->src[i].value[k]; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + j--; + } + } + return 0; +} + static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; - struct tgsi_full_instruction *inst; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; @@ -644,11 +718,11 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh /* reserve first tmp for everyone */ r600_get_temp(&ctx); - inst = &ctx.parse.FullToken.FullInstruction; - opcode = inst->Instruction.Opcode; - for (i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - tgsi_src(&ctx, &inst->Src[i], &ctx.src[i]); - } + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -834,80 +908,6 @@ static void tgsi_dst(struct r600_shader_ctx *ctx, } } -static int tgsi_split_constant(struct r600_shader_ctx *ctx) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nconst, r; - - for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - nconst++; - } - } - for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = ctx->src[i].sel; - alu.src[0].chan = k; - alu.src[0].rel = ctx->src[i].rel; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - ctx->src[i].sel = treg; - ctx->src[i].rel =0; - j--; - } - } - return 0; -} - -/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nliteral, r; - - for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - nliteral++; - } - } - for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = ctx->src[i].sel; - alu.src[0].chan = k; - alu.src[0].value = ctx->src[i].value[k]; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - ctx->src[i].sel = treg; - j--; - } - } - return 0; -} - static int tgsi_last_instruction(unsigned writemask) { int i, lasti = 0; @@ -927,12 +927,6 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -994,13 +988,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) int r; struct r600_bc_alu alu; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1231,13 +1218,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1438,13 +1418,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, r; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1524,12 +1497,6 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1560,12 +1527,6 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; @@ -1844,13 +1805,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) unsigned i; int r; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - /* optimize if it's just an equal balance */ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { for (i = 0; i < lasti + 1; i++) { @@ -1949,13 +1903,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1988,13 +1935,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx); - if (r) - return r; - r = tgsi_split_literal_constant(ctx); - if (r) - return r; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); -- cgit v1.2.3 From b9fd1a1e4b2121225195056ea1b679d62c399ddb Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Remove vs_resource and ps_resource from the pipe context. These are practically unused, only the vs_resource array is being abused for fetch shader resources. --- src/gallium/drivers/r600/r600_pipe.c | 14 -------------- src/gallium/drivers/r600/r600_pipe.h | 4 +--- src/gallium/drivers/r600/r600_state_common.c | 10 +++++----- 3 files changed, 6 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 04bbcf5f4d1..48ff95ba214 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -88,8 +88,6 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_const); u_vbuf_mgr_destroy(rctx->vbuf_mgr); - FREE(rctx->ps_resource); - FREE(rctx->vs_resource); FREE(rctx); } @@ -187,18 +185,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_resource) { - FREE(rctx); - return NULL; - } - - rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->ps_resource) { - FREE(rctx); - return NULL; - } - class = r600_get_family_class(rctx->radeon); if (class == R600 || class == R700) rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e9820a23911..6a0995321a8 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -128,6 +128,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; + struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; @@ -135,9 +136,6 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned nvs_resource; - struct r600_pipe_state *vs_resource; - struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3a959465715..b17686d7752 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -365,18 +365,18 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) struct r600_pipe_state *rstate; struct r600_resource *rbuffer; struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; + unsigned i, count, offset; if (rctx->vertex_elements->vbuffer_need_offset) { /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; + count = rctx->vertex_elements->count; } else { /* bind vertex buffer once */ - rctx->nvs_resource = rctx->vbuf_mgr->nr_real_vertex_buffers; + count = rctx->vbuf_mgr->nr_real_vertex_buffers; } - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; + for (i = 0 ; i < count; i++) { + rstate = &rctx->fs_resource[i]; rstate->id = R600_PIPE_STATE_RESOURCE; rstate->nregs = 0; -- cgit v1.2.3 From 5c59eebfae55240a2308c02b0a6ad971c9b83304 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Generalize the pipe_add_vertex_attrib() functions. This allows them to be used for VS or PS buffer resources as well. --- src/gallium/drivers/r600/evergreen_state.c | 10 ++++------ src/gallium/drivers/r600/r600_pipe.h | 18 ++++++++---------- src/gallium/drivers/r600/r600_state.c | 10 ++++------ src/gallium/drivers/r600/r600_state_common.c | 10 ++++------ 4 files changed, 20 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 05539aa61d7..bfa21997839 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1492,11 +1492,10 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) return rstate; } -void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - unsigned index, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) { r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); @@ -1519,5 +1518,4 @@ void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, index); } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6a0995321a8..5f04fbf0992 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -168,11 +168,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - unsigned index, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); @@ -208,11 +207,10 @@ void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - unsigned index, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a51e7057eee..e4382baad07 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1218,11 +1218,10 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) return rstate; } -void r600_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - unsigned index, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) { r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); @@ -1239,5 +1238,4 @@ void r600_pipe_add_vertex_attrib(struct r600_pipe_context *rctx, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, index); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b17686d7752..a2b2c17e2ed 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -398,13 +398,11 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_add_vertex_attrib(rctx, rstate, i, - rbuffer, offset, - vertex_buffer->stride); + evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { - r600_pipe_add_vertex_attrib(rctx, rstate, i, - rbuffer, offset, - vertex_buffer->stride); + r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } } } -- cgit v1.2.3 From 4c30a80e384d523803d70ead3403cf3ca30b8868 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:07 +0100 Subject: r600g: Handle the ADD_INT instruction in r600_bc_get_num_operands(). --- src/gallium/drivers/r600/r600_asm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 49e48667fa7..ad08aa533a8 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -47,6 +47,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: @@ -94,6 +95,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: -- cgit v1.2.3 From 871460eb149b9868e5750f13b8206e271743c4a2 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:08 +0100 Subject: r600g: Set the fetch type in r600_bc_vtx_build(). --- src/gallium/drivers/r600/r600_asm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index ad08aa533a8..d687c23f4f2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1348,6 +1348,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign } } bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); -- cgit v1.2.3 From 077c448d184799e0d9ec962013ec784c6a5c1807 Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Mon, 7 Feb 2011 15:22:08 +0100 Subject: r600g: Add support for relative addressing on constant buffers. Relative addressing of constant buffers can't work properly through the kcache, since you can only address within the currently locked kcache window. Instead, this patch binds the constant buffer as a shader resource, and then explicitly fetches the constant using a vertex fetch with fetch type VTX_FETCH_NO_INDEX_OFFSET from the shader. There's probably still some room for improvement, doing the fetch right before the instruction that needs the value may not be quite optimal for example. --- src/gallium/drivers/r600/evergreen_state.c | 12 ++- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_pipe.h | 4 + src/gallium/drivers/r600/r600_shader.c | 119 ++++++++++++++++++++++++--- src/gallium/drivers/r600/r600_state.c | 9 +- src/gallium/drivers/r600/r600_state_common.c | 28 ++++++- 6 files changed, 152 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bfa21997839..83ab0df9c16 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -434,7 +434,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -449,9 +450,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -460,7 +463,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 48ff95ba214..0b20b207dc6 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -370,7 +370,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONSTS: return 256; //max native parameters case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5f04fbf0992..b7ea6de3c7c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -36,6 +36,8 @@ #include "r600_shader.h" #include "r600_resource.h" +#define R600_MAX_CONST_BUFFERS 1 + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -140,7 +142,9 @@ struct r600_pipe_context { struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; + struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; /* shader information */ unsigned sprite_coord_enable; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 50f9ed6eda4..acb3ef2c4d6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,6 +28,7 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include @@ -296,6 +297,7 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; @@ -541,6 +543,55 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } } +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -554,7 +605,19 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -683,8 +746,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; @@ -1760,7 +1824,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2302,15 +2366,21 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2325,22 +2395,47 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index e4382baad07..74dad450729 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -495,9 +495,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -507,7 +509,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a2b2c17e2ed..bcaf2b9e45e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -317,6 +317,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + struct r600_pipe_state *rstate; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -327,6 +328,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); + offset += r600_bo_offset(rbuffer->r.bo); switch (shader) { case PIPE_SHADER_VERTEX: @@ -337,8 +339,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + + rstate = &rctx->vs_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } break; case PIPE_SHADER_FRAGMENT: rctx->ps_const_buffer.nregs = 0; @@ -348,8 +361,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->r.bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->r.bo); + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + + rstate = &rctx->ps_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } break; default: R600_ERR("unsupported %d\n", shader); -- cgit v1.2.3 From d8d5c2660f581821f017fdcb7954c6f7bd099114 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 12:09:29 +0100 Subject: Revert "r600g: do not flush the uploader" (with comments) This reverts commit 1c2a4f0820ff2272f993e6da28dcf8bcbbc3252a. --- src/gallium/drivers/r600/r600_pipe.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 0b20b207dc6..de9c6a52e1b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -68,6 +68,13 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, dc++; #endif r600_context_flush(&rctx->ctx); + + /* XXX These shouldn't be really necessary, but removing them breaks some tests. + * Needless buffer reallocations may significantly increase memory consumption, + * so getting rid of these 3 calls is important. */ + u_vbuf_mgr_flush_uploader(rctx->vbuf_mgr); + u_upload_flush(rctx->upload_ib); + u_upload_flush(rctx->upload_const); } static void r600_destroy_context(struct pipe_context *context) -- cgit v1.2.3 From f53cbf8bb0e542a114b76467fe715ceb977411ab Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 15:08:04 +0100 Subject: u_vbuf_mgr: make the uploader public --- src/gallium/auxiliary/util/u_vbuf_mgr.c | 21 ++++++--------------- src/gallium/auxiliary/util/u_vbuf_mgr.h | 11 +++++++++-- src/gallium/drivers/r600/r600_pipe.c | 2 +- 3 files changed, 16 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 28c7f727ef8..55d8695dffa 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -73,8 +73,6 @@ struct u_vbuf_mgr_priv { struct translate_cache *translate_cache; unsigned translate_vb_slot; - struct u_upload_mgr *uploader; - struct u_vbuf_mgr_elements *ve; void *saved_ve, *fallback_ve; boolean ve_binding_lock; @@ -123,9 +121,9 @@ u_vbuf_mgr_create(struct pipe_context *pipe, mgr->pipe = pipe; mgr->translate_cache = translate_cache_create(); - mgr->uploader = u_upload_create(pipe, upload_buffer_size, - upload_buffer_alignment, - PIPE_BIND_VERTEX_BUFFER); + mgr->b.uploader = u_upload_create(pipe, upload_buffer_size, + upload_buffer_alignment, + PIPE_BIND_VERTEX_BUFFER); mgr->caps.fetch_dword_unaligned = fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; @@ -146,7 +144,7 @@ void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb) } translate_cache_destroy(mgr->translate_cache); - u_upload_destroy(mgr->uploader); + u_upload_destroy(mgr->b.uploader); FREE(mgr); } @@ -240,7 +238,7 @@ static void u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, /* Create and map the output buffer. */ num_verts = max_index + 1 - min_index; - u_upload_alloc(mgr->uploader, + u_upload_alloc(mgr->b.uploader, key.output_stride * min_index, key.output_stride * num_verts, &out_offset, &out_buffer, upload_flushed, @@ -537,7 +535,7 @@ static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, size = mgr->ve->native_format_size[i]; } - u_upload_data(mgr->uploader, first, size, + u_upload_data(mgr->b.uploader, first, size, u_vbuf_resource(vb->buffer)->user_ptr + first, &vb->buffer_offset, &mgr->b.real_vertex_buffer[index], @@ -597,10 +595,3 @@ void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) u_vbuf_translate_end(mgr); } } - -void u_vbuf_mgr_flush_uploader(struct u_vbuf_mgr *mgrb) -{ - struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; - - u_upload_flush(mgr->uploader); -} diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 5eb59385f9b..068459af4c3 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -54,6 +54,15 @@ struct u_vbuf_mgr { /* Precomputed max_index for hardware vertex buffers. */ int max_index; + + /* This uploader can optionally be used by the driver. + * + * Allowed functions: + * - u_upload_alloc + * - u_upload_data + * - u_upload_buffer + * - u_upload_flush */ + struct u_upload_mgr *uploader; }; struct u_vbuf_resource { @@ -102,8 +111,6 @@ void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgr, void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgr); -void u_vbuf_mgr_flush_uploader(struct u_vbuf_mgr *mgr); - static INLINE struct u_vbuf_resource *u_vbuf_resource(struct pipe_resource *r) { diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index de9c6a52e1b..ad609fbdfa3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -72,7 +72,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, /* XXX These shouldn't be really necessary, but removing them breaks some tests. * Needless buffer reallocations may significantly increase memory consumption, * so getting rid of these 3 calls is important. */ - u_vbuf_mgr_flush_uploader(rctx->vbuf_mgr); + u_upload_flush(rctx->vbuf_mgr->uploader); u_upload_flush(rctx->upload_ib); u_upload_flush(rctx->upload_const); } -- cgit v1.2.3 From 1ee71bdc8a8471357cee6a1cf2fbaac6a70bb86c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 15:20:11 +0100 Subject: u_vbuf_mgr: add a way to specify the BIND flag for the upload buffer --- src/gallium/auxiliary/util/u_vbuf_mgr.c | 3 ++- src/gallium/auxiliary/util/u_vbuf_mgr.h | 1 + src/gallium/drivers/r300/r300_context.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c index 7ebea7e99c4..ca3b4dc4c4e 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.c +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -114,6 +114,7 @@ struct u_vbuf_mgr * u_vbuf_mgr_create(struct pipe_context *pipe, unsigned upload_buffer_size, unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, enum u_fetch_alignment fetch_alignment) { struct u_vbuf_mgr_priv *mgr = CALLOC_STRUCT(u_vbuf_mgr_priv); @@ -123,7 +124,7 @@ u_vbuf_mgr_create(struct pipe_context *pipe, mgr->b.uploader = u_upload_create(pipe, upload_buffer_size, upload_buffer_alignment, - PIPE_BIND_VERTEX_BUFFER); + upload_buffer_bind); mgr->caps.fetch_dword_unaligned = fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h index 068459af4c3..8b241854c83 100644 --- a/src/gallium/auxiliary/util/u_vbuf_mgr.h +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -83,6 +83,7 @@ struct u_vbuf_mgr * u_vbuf_mgr_create(struct pipe_context *pipe, unsigned upload_buffer_size, unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, enum u_fetch_alignment fetch_alignment); void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgr); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index b8b7afa9c27..7e0c068ff3e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -434,6 +434,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_resource_functions(r300); r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER, U_VERTEX_FETCH_DWORD_ALIGNED); if (!r300->vbuf_mgr) goto fail; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index ad609fbdfa3..9826bf42acc 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -166,6 +166,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void } rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER, U_VERTEX_FETCH_BYTE_ALIGNED); if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); -- cgit v1.2.3 From 467023e8080489abeff53e18ac83560eaf851827 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 15:21:35 +0100 Subject: r600g: use the same upload buffer for vertices, indices, and constants This should reduce memory consumption. --- src/gallium/drivers/r600/r600_buffer.c | 4 ++-- src/gallium/drivers/r600/r600_pipe.c | 28 ++++++---------------------- src/gallium/drivers/r600/r600_pipe.h | 4 +--- src/gallium/drivers/r600/r600_translate.c | 2 +- 4 files changed, 10 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 0a0e3db854a..7483a5292bc 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -207,7 +207,7 @@ void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); boolean flushed; - u_upload_data(rctx->upload_ib, 0, + u_upload_data(rctx->vbuf_mgr->uploader, 0, draw->info.count * draw->index_size, rbuffer->r.b.user_ptr, &draw->index_buffer_offset, @@ -224,7 +224,7 @@ void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resour *rbuffer = NULL; - u_upload_data(rctx->upload_const, 0, size, ptr, const_offset, + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, (struct pipe_resource**)rbuffer, &flushed); } else { *const_offset = 0; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9826bf42acc..a7c19b09275 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -69,12 +69,10 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, #endif r600_context_flush(&rctx->ctx); - /* XXX These shouldn't be really necessary, but removing them breaks some tests. + /* XXX This shouldn't be really necessary, but removing it breaks some tests. * Needless buffer reallocations may significantly increase memory consumption, - * so getting rid of these 3 calls is important. */ + * so getting rid of this call is important. */ u_upload_flush(rctx->vbuf_mgr->uploader); - u_upload_flush(rctx->upload_ib); - u_upload_flush(rctx->upload_const); } static void r600_destroy_context(struct pipe_context *context) @@ -91,8 +89,6 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - u_upload_destroy(rctx->upload_ib); - u_upload_destroy(rctx->upload_const); u_vbuf_mgr_destroy(rctx->vbuf_mgr); FREE(rctx); @@ -165,28 +161,16 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER, + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER, U_VERTEX_FETCH_BYTE_ALIGNED); if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); return NULL; } - rctx->upload_ib = u_upload_create(&rctx->context, 128 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - if (rctx->upload_ib == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } - - rctx->upload_const = u_upload_create(&rctx->context, 1024 * 1024, 256, - PIPE_BIND_CONSTANT_BUFFER); - if (rctx->upload_const == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } - rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { FREE(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index b7ea6de3c7c..71d9647508b 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -151,9 +151,7 @@ struct r600_pipe_context { bool flatshade; struct r600_textures_info ps_samplers; - struct u_vbuf_mgr *vbuf_mgr; - struct u_upload_mgr *upload_ib; - struct u_upload_mgr *upload_const; + struct u_vbuf_mgr *vbuf_mgr; bool blit; }; diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index a980eac95e0..7482d15e12f 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -41,7 +41,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, switch (*index_size) { case 1: - u_upload_alloc(r600->upload_ib, 0, count * 2, + u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2, &out_offset, &out_buffer, &flushed, &ptr); util_shorten_ubyte_elts_to_userptr( -- cgit v1.2.3 From b541a3c4c0a125087fa9e1e0d35db019c36fb0e9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 16:34:22 +0100 Subject: r300g: use the same upload buffer for vertices and indices --- src/gallium/drivers/r300/r300_context.c | 14 +++----------- src/gallium/drivers/r300/r300_context.h | 3 --- src/gallium/drivers/r300/r300_render_translate.c | 6 +++--- src/gallium/drivers/r300/r300_screen_buffer.c | 2 +- 4 files changed, 7 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7e0c068ff3e..c6e03050fc7 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -104,8 +104,6 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->vbuf_mgr) u_vbuf_mgr_destroy(r300->vbuf_mgr); - if (r300->upload_ib) - u_upload_destroy(r300->upload_ib); /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); @@ -434,8 +432,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_resource_functions(r300); r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER, - U_VERTEX_FETCH_DWORD_ALIGNED); + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); if (!r300->vbuf_mgr) goto fail; @@ -453,13 +452,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300_hyperz_init_mm(r300)) goto fail; - r300->upload_ib = u_upload_create(&r300->context, - 64 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - - if (r300->upload_ib == NULL) - goto fail; - r300_init_states(&r300->context); /* The KIL opcode needs the first texture unit to be enabled diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 9d2a0b290ae..1737dafc6ac 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -579,9 +579,6 @@ struct r300_context { /* two mem block managers for hiz/zmask ram space */ struct mem_block *hiz_mm; - /* upload manager */ - struct u_upload_mgr *upload_ib; - struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 76d012d81e6..dbc64ebf827 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -37,7 +37,7 @@ void r300_translate_index_buffer(struct r300_context *r300, switch (*index_size) { case 1: - u_upload_alloc(r300->upload_ib, 0, count * 2, + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, &out_offset, &out_buffer, &flushed, &ptr); util_shorten_ubyte_elts_to_userptr( @@ -52,7 +52,7 @@ void r300_translate_index_buffer(struct r300_context *r300, case 2: if (index_offset) { - u_upload_alloc(r300->upload_ib, 0, count * 2, + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, &out_offset, &out_buffer, &flushed, &ptr); util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer, @@ -67,7 +67,7 @@ void r300_translate_index_buffer(struct r300_context *r300, case 4: if (index_offset) { - u_upload_alloc(r300->upload_ib, 0, count * 4, + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4, &out_offset, &out_buffer, &flushed, &ptr); util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer, diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index d76524d261d..4a3cc7a3f41 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -67,7 +67,7 @@ void r300_upload_index_buffer(struct r300_context *r300, *index_buffer = NULL; - u_upload_data(r300->upload_ib, + u_upload_data(r300->vbuf_mgr->uploader, 0, count * index_size, ptr + (*start * index_size), &index_offset, -- cgit v1.2.3 From f0b202ec73855bd9e1b29909c8ac90393043cb8b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 17:30:39 +0100 Subject: r600g: slab-allocate buffer and transfer structures --- src/gallium/drivers/r600/r600_buffer.c | 48 +++++++++++++++++++++++++++------- src/gallium/drivers/r600/r600_pipe.c | 40 +++++++++++++++++++++++++++- src/gallium/drivers/r600/r600_pipe.h | 7 +++++ 3 files changed, 85 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 7483a5292bc..2a427839fdf 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -42,13 +42,14 @@ static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; - FREE(rbuffer); + util_slab_free(&rscreen->pool_buffers, rbuffer); } static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, @@ -59,6 +60,29 @@ static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } +static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->layer_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} + static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { @@ -100,13 +124,21 @@ static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, { } +static void r600_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + util_slab_free(&rctx->pool_transfers, transfer); +} + + static const struct u_resource_vtbl r600_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ r600_buffer_destroy, /* resource_destroy */ r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + r600_get_transfer, /* get_transfer */ + r600_transfer_destroy, /* transfer_destroy */ r600_buffer_transfer_map, /* transfer_map */ r600_buffer_transfer_flush_region, /* transfer_flush_region */ r600_buffer_transfer_unmap, /* transfer_unmap */ @@ -116,14 +148,13 @@ static const struct u_resource_vtbl r600_buffer_vtbl = struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer; struct r600_bo *bo; /* XXX We probably want a different alignment for buffers and textures. */ unsigned alignment = 4096; - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; + rbuffer = util_slab_alloc(&rscreen->pool_buffers); rbuffer->magic = R600_BUFFER_MAGIC; rbuffer->r.b.b.b = *templ; @@ -151,11 +182,10 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer; - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; + rbuffer = util_slab_alloc(&rscreen->pool_buffers); rbuffer->magic = R600_BUFFER_MAGIC; pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index a7c19b09275..f9e8e76d241 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -75,6 +75,26 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, u_upload_flush(rctx->vbuf_mgr->uploader); } +static void r600_update_num_contexts(struct r600_screen *rscreen, + int diff) +{ + pipe_mutex_lock(rscreen->mutex_num_contexts); + if (diff > 0) { + rscreen->num_contexts++; + + if (rscreen->num_contexts > 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_MULTITHREADED); + } else { + rscreen->num_contexts--; + + if (rscreen->num_contexts <= 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_SINGLETHREADED); + } + pipe_mutex_unlock(rscreen->mutex_num_contexts); +} + static void r600_destroy_context(struct pipe_context *context) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; @@ -90,6 +110,9 @@ static void r600_destroy_context(struct pipe_context *context) } u_vbuf_mgr_destroy(rctx->vbuf_mgr); + util_slab_destroy(&rctx->pool_transfers); + + r600_update_num_contexts(rctx->screen, -1); FREE(rctx); } @@ -102,6 +125,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (rctx == NULL) return NULL; + + r600_update_num_contexts(rscreen, 1); + rctx->context.winsys = rscreen->screen.winsys; rctx->context.screen = screen; rctx->context.priv = priv; @@ -161,6 +187,10 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + util_slab_create(&rctx->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_SLAB_SINGLETHREADED); + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256, PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | @@ -173,7 +203,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { - FREE(rctx); + r600_destroy_context(&rctx->context); return NULL; } @@ -444,6 +474,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) radeon_decref(rscreen->radeon); + util_slab_destroy(&rscreen->pool_buffers); + pipe_mutex_destroy(rscreen->mutex_num_contexts); FREE(rscreen); } @@ -471,5 +503,11 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->tiling_info = r600_get_tiling_info(radeon); + util_slab_create(&rscreen->pool_buffers, + sizeof(struct r600_resource_buffer), 64, + UTIL_SLAB_SINGLETHREADED); + + pipe_mutex_init(rscreen->mutex_num_contexts); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 71d9647508b..8dc1f4ad5c3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,6 +30,7 @@ #include #include #include +#include "util/u_slab.h" #include "util/u_vbuf_mgr.h" #include "r600.h" #include "r600_public.h" @@ -64,6 +65,11 @@ struct r600_screen { struct pipe_screen screen; struct radeon *radeon; struct r600_tiling_info *tiling_info; + struct util_slab_mempool pool_buffers; + unsigned num_contexts; + + /* for thread-safe write accessing to num_contexts */ + pipe_mutex mutex_num_contexts; }; struct r600_pipe_sampler_view { @@ -152,6 +158,7 @@ struct r600_pipe_context { struct r600_textures_info ps_samplers; struct u_vbuf_mgr *vbuf_mgr; + struct util_slab_mempool pool_transfers; bool blit; }; -- cgit v1.2.3 From 71df812146ae316d9d371be3436e90da79435be9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 17:36:48 +0100 Subject: r600g: add a faster implementation of transfer_inline_write u_default_transfer_inline_write uses util_copy_rect, which is kinda slow. --- src/gallium/drivers/r600/r600_buffer.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 2a427839fdf..183e34720a9 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -131,6 +131,30 @@ static void r600_transfer_destroy(struct pipe_context *ctx, util_slab_free(&rctx->pool_transfers, transfer); } +static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_resource_buffer *rbuffer = r600_buffer(resource); + uint8_t *map = NULL; + + assert(rbuffer->b.user_ptr == NULL); + + map = r600_bo_map(ws, rbuffer->r.bo, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, + pipe); + + memcpy(map + box->x, data, box->width); + + if (rbuffer->r.bo) + r600_bo_unmap(ws, rbuffer->r.bo); +} static const struct u_resource_vtbl r600_buffer_vtbl = { @@ -142,7 +166,7 @@ static const struct u_resource_vtbl r600_buffer_vtbl = r600_buffer_transfer_map, /* transfer_map */ r600_buffer_transfer_flush_region, /* transfer_flush_region */ r600_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + r600_buffer_transfer_inline_write /* transfer_inline_write */ }; struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, -- cgit v1.2.3 From 69e5516308095efe12e557fd3d60d0caae514a36 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Feb 2011 18:18:13 +0100 Subject: r600g: fixup assertion --- src/gallium/drivers/r600/r600_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 183e34720a9..0c5d7133c7a 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -144,7 +144,7 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, struct r600_resource_buffer *rbuffer = r600_buffer(resource); uint8_t *map = NULL; - assert(rbuffer->b.user_ptr == NULL); + assert(rbuffer->r.b.user_ptr == NULL); map = r600_bo_map(ws, rbuffer->r.bo, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, -- cgit v1.2.3 From f0d742962377948a9688f4fa3b92c2f8bbca03e9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 1 Feb 2011 20:52:49 +0100 Subject: nvc0: detect no-op MIN/MAX, do CSE earlier to succeed more often --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 127 +++++++++++++++++----------- 1 file changed, 79 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index e0d4e2daf9b..b6d99724a10 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -607,25 +607,83 @@ constant_operand(struct nv_pc *pc, } } +static void +handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + + if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) + return; + if (src0->reg.file != NV_FILE_GPR) + return; + nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); + nvc0_insn_delete(nvi); +} + +/* check if we can MUL + ADD -> MAD/FMA */ +static void +handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + struct nv_value *src; + int s; + uint8_t mod[4]; + + if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; + else + return; + + if ((src0->insn && src0->insn->bb != nvi->bb) || + (src1->insn && src1->insn->bb != nvi->bb)) + return; + + /* check for immediates from prior constant folding */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + return; + src = nvi->src[s]->value; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[1]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + return; + + nvi->opcode = NV_OP_MAD_F32; + + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + nvi->src[!s] = NULL; + + nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); + nvi->src[1]->mod = mod[3]; +} + static int -nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1, *src; - int s; - uint8_t mod[4]; + struct nv_value *src0, *src1; + uint baseop = NV_BASEOP(nvi->opcode); next = nvi->next; src0 = nvc0_pc_find_immediate(nvi->src[0]); src1 = nvc0_pc_find_immediate(nvi->src[1]); - if (src0 && src1) + if (src0 && src1) { constant_expression(ctx->pc, nvi, src0, src1); - else { + } else { if (src0) constant_operand(ctx->pc, nvi, src0, 0); else @@ -633,44 +691,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* check if we can MUL + ADD -> MAD/FMA */ - if (nvi->opcode != NV_OP_ADD) - continue; - - src0 = nvi->src[0]->value; - src1 = nvi->src[1]->value; - - if (SRC_IS_MUL(src0) && src0->refc == 1) - src = src0; - else - if (SRC_IS_MUL(src1) && src1->refc == 1) - src = src1; + if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) + handle_min_max(ctx, nvi); else - continue; - - /* could have an immediate from above constant_* */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - continue; - s = (src == src0) ? 0 : 1; - - mod[0] = nvi->src[0]->mod; - mod[1] = nvi->src[1]->mod; - mod[2] = src->insn->src[0]->mod; - mod[3] = src->insn->src[0]->mod; - - if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) - continue; - - nvi->opcode = NV_OP_MAD; - nv_reference(ctx->pc, nvi, s, NULL); - nvi->src[2] = nvi->src[!s]; - - nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); - nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = mod[2] ^ mod[s]; - nvi->src[1]->mod = mod[3]; + if (nvi->opcode == NV_OP_ADD_F32) + handle_add_mul(ctx, nvi); } - DESCEND_ARBITRARY(j, nv_pass_lower_arith); + DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); return 0; } @@ -1158,11 +1185,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) pass.n = 0; pass.pc = pc; + /* Do CSE so we can just compare values by pointer in subsequent passes. */ + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + /* Do this first, so we don't have to pay attention * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, root); + ret = nv_pass_algebraic_opt(&pass, root); if (ret) return ret; @@ -1190,11 +1223,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) reldelim->pc = pc; } - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - + /* May run DCE before load-combining since that pass will clean up + * after itself. + */ dce.pc = pc; do { dce.removed = 0; -- cgit v1.2.3 From d3ea15f5ca570b9d885781cd72232026e50046d0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 21:23:09 +0100 Subject: nvc0: don't combine memory loads across block boundaries --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index b6d99724a10..57bf4b77f3e 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -849,6 +849,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) } } + ctx->alloc = 0; + ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; + for (s = 0; s < 16; ++s) + ctx->mem_c[s] = NULL; + DESCEND_ARBITRARY(s, nv_pass_mem_opt); return 0; } -- cgit v1.2.3 From c62fc50c884e2755c0731c395f200d23b975fbde Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 21:38:07 +0100 Subject: nvc0: reset texture base address after read transfer --- src/gallium/drivers/nvc0/nvc0_transfer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 92e006cba49..b279bdc6e7d 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -318,6 +318,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[1].domain = NOUVEAU_BO_GART; if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; unsigned i; for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], @@ -328,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base += mt->layer_stride; tx->rect[1].base += size; } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; } - tx->rect[0].z = z; - tx->rect[1].base = 0; return &tx->base; } -- cgit v1.2.3 From 92d8af582d2584ed95bbb4c7965812f7bc47c9ff Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 18:59:46 +0100 Subject: nvc0: try to fix register conflicts for vector instructions Vector here means using multiple 32 bit regs which are forced to be consecutive in the register file. This still isn't quite nice. --- src/gallium/drivers/nvc0/nvc0_pc.h | 3 ++- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 23 +++++++++--------- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 37 ++++++++++++++++++++--------- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 12 ++++------ 4 files changed, 43 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 01ca95b0741..0756288daf7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 57bf4b77f3e..a6791529fa7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1147,13 +1147,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1161,20 +1163,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a1507..ee282680061 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -477,7 +477,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) break; case NV_OP_MOV: if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: @@ -488,18 +488,16 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) do_join_values(ctx, i->def[0], i->src[c]->value); } break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ - break; case NV_OP_BIND: if (iter) break; - for (c = 0; c < 6 && i->src[c]; ++c) + for (c = 0; c < 4 && i->src[c]; ++c) do_join_values(ctx, i->def[c], i->src[c]->value); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -730,6 +728,21 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } +/* We allocate registers for all defs of a vector instruction at once. + * Since we'll encounter all of them in the allocation loop, do the allocation + * when we're at the one with the live range that starts latest. + */ +static boolean +is_best_representative(struct nv_value *val) +{ + struct nv_instruction *nvi = val->insn; + int i; + for (i = 0; i < 4 && val->insn->def[i]; ++i) + if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) + return FALSE; + return TRUE; +} + static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -821,11 +834,13 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) boolean mem = FALSE; int v = nvi_vector_size(cur->insn); - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else + if (v > 1) { + if (is_best_representative(cur)) + mem = !reg_assign(&f, &cur->insn->def[0], v); + } else { if (iter) mem = !reg_assign(&f, &cur, 1); + } if (mem) { NOUVEAU_ERR("out of registers\n"); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 9b5d4290787..f53af6c49c3 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1333,10 +1333,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1878,10 +1874,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) { -- cgit v1.2.3 From 8f051345807494ae0aeaf75e698477f65f29322d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:01:54 +0100 Subject: nvc0: set basic block on manual instruction insertion --- src/gallium/drivers/nvc0/nvc0_pc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 1d1b9e19b78..a2006321021 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -518,6 +518,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) ni->prev = at; ni->next->prev = ni; ni->prev->next = ni; + ni->bb = at->bb; + ni->bb->num_instructions++; } void -- cgit v1.2.3 From d5263e4093e7fefacbbe3bbbec717cdf64856cbe Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:03:09 +0100 Subject: nv50,nvc0: fix condition code change when commuting SET sources --- src/gallium/drivers/nv50/nv50_pc_optimize.c | 2 +- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 27eb3817bf1..679e5ea1485 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -299,7 +299,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index a6791529fa7..2e554dbe4e4 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -258,8 +258,8 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) - nvi->set_cond = cc_swapped[nvi->set_cond]; + if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static void -- cgit v1.2.3 From 8e240e6153e089d23f646c7b3f2c5edff7ac223c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 19:05:55 +0100 Subject: nvc0: store only one value per basic block for TGSI regs --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index f53af6c49c3..8c0967dfa87 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i) static INLINE void bld_register_add_val(struct bld_register *reg, struct nv_value *val) { - util_dynarray_append(®->vals, struct nv_value *, val); + struct nv_basic_block *bb = val->insn->bb; + + if (reg->vals.size && + (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) + *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; + else + util_dynarray_append(®->vals, struct nv_value *, val); } static INLINE boolean -- cgit v1.2.3 From c485368efea8527da68a476af4ed48541b5ed93e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 6 Feb 2011 13:09:24 +0100 Subject: nvc0: do not generate a backwards jump if a loop ends with BRK --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 8c0967dfa87..8b6cb0e7c76 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1693,9 +1693,11 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ + bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + } bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ -- cgit v1.2.3 From 7401590dedf6f2abb1f0f0db988be90acb1fb84f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 14:54:17 +0100 Subject: nv50,nvc0: do not forget to apply sign mode to saved TGSI inputs --- src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 2 +- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index d6b80c3ea79..ce9300ad8fd 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -1130,7 +1130,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: res = bld_saved_input(bld, idx, swz); if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - return res; + break; res = new_value(bld->pc, bld->ti->input_file, type); res->reg.id = bld->ti->input_map[idx][swz]; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 8b6cb0e7c76..dd1c7f73f5c 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -983,14 +983,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b) bld->out_kind = CFG_EDGE_FORWARD; } -static struct nv_value * -bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) -{ - if (bld->saved_inputs[i][c]) - return bld->saved_inputs[i][c]; - return NULL; -} - static struct nv_value * bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) { @@ -1058,9 +1050,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: assert(!src->Register.Dimension); if (!ptr) { - res = bld_get_saved_input(bld, idx, swz); + res = bld->saved_inputs[idx][swz]; if (res) - return res; + break; } res = new_value(bld->pc, bld->ti->input_file, 4); if (ptr) -- cgit v1.2.3 From 4124feabcbbcf9ebe1ba37cf64419edbeda9c519 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 21:17:37 +0100 Subject: nvc0: make sure phi-ops really have one source per in-block --- src/gallium/drivers/nvc0/nvc0_pc.h | 2 + src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 57 +++++++++++++++++++---------- 2 files changed, 40 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 0756288daf7..40d728aefc7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -345,6 +345,8 @@ struct nv_ref { uint8_t flags; }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index ee282680061..718943bdbdf 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -360,20 +360,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -420,21 +432,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - /* value already encountered from a different in-block */ - val = val->insn->src[0]->value; - while (j < 6 && i->src[j]) - ++j; - assert(j < 6); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 6); /* XXX: really ugly shaders */ ni = new_instruction(ctx->pc, NV_OP_MOV); - - /* TODO: insert instruction at correct position in the first place */ if (ni->prev && ni->prev->target) nvc0_insns_permute(ni->prev, ni); @@ -442,7 +456,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ni->def[0]->insn = ni; nv_reference(ctx->pc, ni, 0, val); nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { @@ -619,15 +633,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + /* XXX: what to do about back/fake-edges (used to include both here) ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -877,6 +892,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) ret = pass_generate_phi_movs(ctx, root); assert(!ret); +#ifdef NVC0_RA_DEBUG_LIVEI + nvc0_print_function(root); +#endif + for (i = 0; i < pc->loop_nesting_bound; ++i) { pc->pass_seq++; ret = pass_build_live_sets(ctx, root); -- cgit v1.2.3 From 0f776fea432052c00972ae1c6a0fbf76ec5e0b6c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 7 Feb 2011 21:19:23 +0100 Subject: nvc0: implement local memory load and store ops --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 35 +++++++++++++++++++++++++----- src/gallium/drivers/nvc0/nvc0_pc_print.c | 4 ++-- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++++---- 3 files changed, 38 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 644b9ef61a1..b2a80566a02 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -804,11 +804,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) } static void -emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) { - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); - emit_ldst_size(pc, i); set_pred(pc, i); @@ -818,6 +815,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) DID(pc, i->def[0], 14); } +static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ld_common(pc, i); +} + static void emit_ld(struct nv_pc *pc, struct nv_instruction *i) { @@ -829,6 +835,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) } else { emit_ld_const(pc, i); } + } else + if (SFILE(i, 0) == NV_FILE_MEM_L) { + pc->emit[0] = 0x00000005; + pc->emit[1] = 0xc0000000; + + emit_ld_common(pc, i); } else { NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); abort(); @@ -838,8 +850,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) static void emit_st(struct nv_pc *pc, struct nv_instruction *i) { - NOUVEAU_ERR("emit_st: not handled yet\n"); - abort(); + if (SFILE(i, 0) != NV_FILE_MEM_L) + NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); + + pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ + pc->emit[1] = 0xc8000000; + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->src[1]->value, 14); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 76dd0f57500..9e0bffacd60 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -280,8 +280,8 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index dd1c7f73f5c..43c27fd8906 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -569,11 +569,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - loc->reg.id = ofst * 4; + loc->reg.address = ofst * 4; nv_reference(bld->pc, insn, 0, loc); - nv_reference(bld->pc, insn, 1, ptr); - nv_reference(bld->pc, insn, 2, val); + nv_reference(bld->pc, insn, 1, val); + if (ptr) + bld_src_pointer(bld, insn, 2, ptr); } static struct nv_value * @@ -585,7 +586,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) loc->reg.address = ofst * 4; - val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + val = bld_insn_1(bld, NV_OP_LD, loc); + if (ptr) + bld_src_pointer(bld, val->insn, 1, ptr); return val; } -- cgit v1.2.3 From 0691530b7f01f3106f7b4d697cd7a42f86fa23d5 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 8 Feb 2011 16:55:06 +0100 Subject: nvc0: replace branching with predicated insns where feasible --- src/gallium/drivers/nvc0/nvc0_pc.c | 13 ++--- src/gallium/drivers/nvc0/nvc0_pc.h | 12 ++-- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 86 ++++++++++++++++++++++++++--- src/gallium/drivers/nvc0/nvc0_pc_print.c | 28 +++++----- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 30 +++++++--- 5 files changed, 123 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index a2006321021..e0cba05b976 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -55,15 +55,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, boolean nvc0_insn_is_predicateable(struct nv_instruction *nvi) { - int s; - - if (!nv_op_predicateable(nvi->opcode)) + if (nvi->predicate >= 0) /* already predicated */ return FALSE; - if (nvi->predicate >= 0) + if (!nvc0_op_info_table[nvi->opcode].predicate && + !nvc0_op_info_table[nvi->opcode].pseudo) return FALSE; - for (s = 0; s < 4 && nvi->src[s]; ++s) - if (nvi->src[s]->value->reg.file == NV_FILE_IMM) - return FALSE; return TRUE; } @@ -505,6 +501,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->terminator) + nvc0_insns_permute(i->prev, i); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 40d728aefc7..efa073a9201 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -205,6 +205,10 @@ #define NV_CC_C 0x11 #define NV_CC_A 0x12 #define NV_CC_S 0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P 0 +#define NV_CC_NOT_P 1 #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -260,12 +264,6 @@ nv_op_supported_src_mods(uint opcode) return nvc0_op_info_table[opcode].mods; } -static INLINE boolean -nv_op_predicateable(uint opcode) -{ - return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; -} - static INLINE uint nv_type_order(ubyte type) { @@ -488,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode) assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); insn->opcode = opcode; - insn->cc = 0; + insn->cc = NV_CC_P; insn->indirect = -1; insn->predicate = -1; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 2e554dbe4e4..6cfa03d5b16 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi) nvc0_insn_refcount(nvi))); } +/* Check if we do not actually have to emit this instruction. */ static INLINE boolean inst_is_noop(struct nv_instruction *nvi) { @@ -1043,7 +1044,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) return 0; } -#if 0 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with * BREAK and dummy ELSE block. @@ -1064,24 +1064,92 @@ bb_is_if_else_endif(struct nv_basic_block *bb) } } -/* predicate instructions and remove branch at the end */ +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */ static void predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *p, ubyte cc) + struct nv_value *pred, uint8_t cc) { + struct nv_instruction *nvi, *prev; + int s; + if (!b->entry) + return; + for (nvi = b->entry; nvi; nvi = nvi->next) { + prev = nvi; + if (inst_is_noop(nvi)) + continue; + for (s = 0; nvi->src[s]; ++s); + assert(s < 6); + nvi->predicate = s; + nvi->cc = cc; + nv_reference(pc, nvi, nvi->predicate, pred); + } + if (prev->opcode == NV_OP_BRA && + b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && + b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) + nvc0_insn_delete(prev); } -#endif -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ + if (nvi->def[0] && values_equal(nvi->def[0], pred)) + return FALSE; + return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible. */ static int nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) { + struct nv_instruction *nvi; + struct nv_value *pred; + int k; + int n0, n1; /* instruction counts of outgoing blocks */ + + if (bb_is_if_else_endif(b)) { + assert(b->exit && b->exit->opcode == NV_OP_BRA); + + assert(b->exit->predicate >= 0); + pred = b->exit->src[b->exit->predicate]->value; + + n1 = n0 = 0; + for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) + if (!may_predicate_insn(nvi, pred)) + break; + if (!nvi) { + /* we're after register allocation, so there always is an ELSE block */ + for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) + if (!may_predicate_insn(nvi, pred)) + break; + } + + /* 12 is an arbitrary limit */ + if (!nvi && n0 < 12 && n1 < 12) { + predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); + predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + + nvc0_insn_delete(b->exit); /* delete the branch */ + + /* and a potential joinat before it */ + if (b->exit && b->exit->opcode == NV_OP_JOINAT) + nvc0_insn_delete(b->exit); + + /* remove join operations at the end of the conditional */ + k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + if ((nvi = b->out[0]->out[k]->entry)) { + nvi->join = 0; + if (nvi->opcode == NV_OP_JOIN) + nvc0_insn_delete(nvi); + } + } + } + DESCEND_ARBITRARY(k, nv_pass_flatten); + return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 9e0bffacd60..7840078614f 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i) PRINT("%s", gree); if (NV_BASEOP(i->opcode) == NV_OP_SET) - PRINT("set %s", nv_cond_name(i->set_cond)); + PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); else if (i->saturate) PRINT("sat %s", nvc0_opcode_name(i->opcode)); @@ -278,7 +278,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, - { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 43c27fd8906..72bfcd0c95f 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -659,7 +659,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src) static void bld_flow(struct bld_context *bld, uint opcode, - struct nv_value *src, struct nv_basic_block *target, + struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, boolean reconverge) { struct nv_instruction *nvi; @@ -670,8 +670,10 @@ bld_flow(struct bld_context *bld, uint opcode, nvi = new_instruction(bld->pc, opcode); nvi->target = target; nvi->terminator = 1; - if (src) - bld_src_predicate(bld, nvi, 0, src); + if (pred) { + nvi->cc = cc; + bld_src_predicate(bld, nvi, 0, pred); + } } static ubyte @@ -1584,6 +1586,7 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_IF: { struct nv_basic_block *b = new_basic_block(bld->pc); + struct nv_value *pred = emit_fetch(bld, insn, 0, 0); assert(bld->cond_lvl < BLD_MAX_COND_NESTING); @@ -1592,10 +1595,19 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, - emit_fetch(bld, insn, 0, 0), bld->zero); + if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { + pred = bld_clone(bld, pred->insn); + pred->reg.size = 1; + pred->reg.file = NV_FILE_PRED; + if (pred->insn->opcode == NV_OP_FSET_F32) + pred->insn->opcode = NV_OP_SET_F32; + } else { + pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, + pred, bld->zero); + } + assert(!mask); - bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); ++bld->cond_lvl; bld_new_block(bld, b); @@ -1661,7 +1673,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); @@ -1673,7 +1685,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1689,7 +1701,7 @@ bld_instruction(struct bld_context *bld, struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); } -- cgit v1.2.3 From 0bd04cdd1245a9bfca67e87018125e7ab287d1c0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 9 Feb 2011 14:26:14 +0100 Subject: nvc0: make CSE work for ops with multiple results --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 85 ++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 6cfa03d5b16..9a7094e5d3c 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1153,40 +1153,98 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* Tests instructions for equality, but independently of sources. */ +static boolean +is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) +{ + if (a->opcode != b->opcode) + return FALSE; + if (nv_is_texture_op(a->opcode)) { + if (a->ext.tex.t != b->ext.tex.t || + a->ext.tex.s != b->ext.tex.s) + return FALSE; + if (a->tex_dim != b->tex_dim || + a->tex_array != b->tex_array || + a->tex_cube != b->tex_cube || + a->tex_shadow != b->tex_shadow || + a->tex_live != b->tex_live) + return FALSE; + } else + if (a->opcode == NV_OP_CVT) { + if (a->ext.cvt.s != b->ext.cvt.s || + a->ext.cvt.d != b->ext.cvt.d) + return FALSE; + } else + if (NV_BASEOP(a->opcode) == NV_OP_SET || + NV_BASEOP(a->opcode) == NV_OP_SLCT) { + if (a->set_cond != b->set_cond) + return FALSE; + } else + if (a->opcode == NV_OP_LINTERP || + a->opcode == NV_OP_PINTERP) { + if (a->centroid != b->centroid || + a->flat != b->flat) + return FALSE; + } + if (a->cc != b->cc) + return FALSE; + if (a->lanes != b->lanes || + a->patch != b->patch || + a->saturate != b->saturate) + return FALSE; + if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ + return FALSE; + return TRUE; +} + /* local common subexpression elimination, stupid O(n^2) implementation */ static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s; + int s, d; unsigned int reps; do { reps = 0; for (ir = entry; ir; ir = next) { next = ir->next; + if (ir->fixed) + continue; for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode || ir->fixed) + if (!is_operation_equal(ir, ik)) continue; - - if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + if (!ir->def[0] || !ik->def[0]) continue; if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) continue; - if (!values_equal(ik->def[0], ir->def[0])) + for (d = 0; d < 4; ++d) { + if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) + break; + if (ir->def[d]) { + if (!values_equal(ik->def[0], ir->def[0])) + break; + } else { + d = 4; + break; + } + } + if (d != 4) continue; - for (s = 0; s < 3; ++s) { + for (s = 0; s < 5; ++s) { struct nv_value *a, *b; - if (!ik->src[s]) { - if (ir->src[s]) - break; - continue; + if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) + break; + if (!ir->src[s]) { + s = 5; + break; } + if (ik->src[s]->mod != ir->src[s]->mod) break; a = ik->src[s]->value; @@ -1194,14 +1252,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (a == b) continue; if (a->reg.file != b->reg.file || - a->reg.id < 0 || + a->reg.id < 0 || /* this excludes memory loads/stores */ a->reg.id != b->reg.id) break; } - if (s == 3) { + if (s == 5) { nvc0_insn_delete(ir); + for (d = 0; d < 4 && ir->def[d]; ++d) + nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); ++reps; - nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } -- cgit v1.2.3 From 95f0aa0e52b694f496dcd32f640d1a29b97f4d0d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 16:35:21 +0100 Subject: nvc0: correct storage type for 16 bit surface formats --- src/gallium/drivers/nvc0/nvc0_miptree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 7c7e134146e..22f48c8a5fe 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, switch (pt->format) { case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x0700; /* COMPRESSED */ - tile_flags = 0x0200; /* NORMAL ? */ - tile_flags = 0x0100; /* NORMAL ? */ + tile_flags = 0x0100; /* NORMAL */ break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ @@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, break; case PIPE_FORMAT_R16G16B16A16_UNORM: tile_flags = 0xe900; /* COMPRESSED */ + tile_flags = 0xfe00; /* NORMAL */ break; default: tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ -- cgit v1.2.3 From fc798dc37dd8a39c253a436fa0c9dd2071f09270 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 2 Feb 2011 22:04:53 +0100 Subject: nvc0: fix stride of NVC0_3D_RT methods --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 1a34313912c..4b1325a3043 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -158,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_RT__ESIZE 0x00000020 #define NVC0_3D_RT__LEN 0x00000008 -#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) -#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) -#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) -#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) -#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) -#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) #define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 #define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 #define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 #define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 #define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 -#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) #define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff #define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 #define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 -#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 -- cgit v1.2.3 From b6e3130a3b6e40308ddb5d11638d509fe69eb912 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 9 Feb 2011 15:01:23 +0100 Subject: nvc0: serialize on PIPE_FLUSH_RENDER_CACHE as well Effects were easily visible in piglit/fbo-generatemipmap-formats. --- src/gallium/drivers/nvc0/nvc0_context.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 1ebf9e2bafb..20c1a31b5b3 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -41,6 +41,10 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); OUT_RING (chan, 0x00); + } else + if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); } if (fence) { -- cgit v1.2.3 From 2d958853080d74da3abb4251fba75cd7df9cd879 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 1 Dec 2010 15:41:12 +0000 Subject: svga: Don't advertise pixel shader addr register support. It's not fully supported. --- src/gallium/drivers/svga/svga_screen.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index d0f42c614c9..9b699eadcc1 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -225,13 +225,18 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return svgascreen->use_ps30 ? 32 : 12; return result.u; case PIPE_SHADER_CAP_MAX_ADDRS: - return svgascreen->use_ps30 ? 1 : 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + /* + * Although PS 3.0 has some addressing abilities it can only represent + * loops that can be statically determined and unrolled. Given we can + * only handle a subset of the cases that the state tracker already + * does it is better to defer loop unrolling to the state tracker. + */ + return 0; case PIPE_SHADER_CAP_MAX_PREDS: return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: -- cgit v1.2.3 From 2314a2f45ff29823413d12e93a325f361b46fdd7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 01:36:23 +0100 Subject: Revert "r300g: support sRGB colorbuffers" This partially reverts commit 91eba2567eab9409d94efc3c1f07a4a3731d0047. Conflicts: src/gallium/drivers/r300/r300_blit.c --- src/gallium/drivers/r300/r300_texture.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 7dd2fe326f8..e96d340d569 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -375,7 +375,6 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: /*case PIPE_FORMAT_L8_SNORM:*/ - case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; @@ -383,7 +382,6 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: /*case PIPE_FORMAT_L8A8_SNORM:*/ - case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: return R300_COLOR_FORMAT_UV88; @@ -402,28 +400,20 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 32-bit buffers. */ case PIPE_FORMAT_B8G8R8A8_UNORM: /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ - case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ - case PIPE_FORMAT_B8G8R8X8_SRGB: case PIPE_FORMAT_A8R8G8B8_UNORM: /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_A8R8G8B8_SRGB: case PIPE_FORMAT_X8R8G8B8_UNORM: /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ - case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: case PIPE_FORMAT_X8B8G8R8_UNORM: /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ - case PIPE_FORMAT_X8B8G8R8_SRGB: case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ - /*case PIPE_FORMAT_R8G8B8X8_SRGB:*/ case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; @@ -534,7 +524,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: /*case PIPE_FORMAT_L8_SNORM:*/ - case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; @@ -543,7 +532,6 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) * COLORFORMAT_UV88 stores C2 and C0. */ case PIPE_FORMAT_L8A8_UNORM: /*case PIPE_FORMAT_L8A8_SNORM:*/ - case PIPE_FORMAT_L8A8_SRGB: return modifier | R300_C0_SEL_A | R300_C2_SEL_R; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: @@ -557,10 +545,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ - case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ - case PIPE_FORMAT_B8G8R8X8_SRGB: case PIPE_FORMAT_B10G10R10A2_UNORM: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | @@ -569,10 +555,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ARGB outputs. */ case PIPE_FORMAT_A8R8G8B8_UNORM: /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ - case PIPE_FORMAT_A8R8G8B8_SRGB: case PIPE_FORMAT_X8R8G8B8_UNORM: /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ - case PIPE_FORMAT_X8R8G8B8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; @@ -580,10 +564,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ABGR outputs. */ case PIPE_FORMAT_A8B8G8R8_UNORM: /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ - case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_X8B8G8R8_UNORM: /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ - case PIPE_FORMAT_X8B8G8R8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; @@ -591,10 +573,8 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ - /*case PIPE_FORMAT_R8G8B8X8_SRGB:*/ case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: -- cgit v1.2.3 From fc9170d0cf1c7a7dcd580e9da17742a6ab68bd1b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 02:11:38 +0100 Subject: r300g: use format from pipe_surface instead of pipe_resource --- src/gallium/drivers/r300/r300_context.h | 7 ------- src/gallium/drivers/r300/r300_texture.c | 36 +++++++++++++-------------------- 2 files changed, 14 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 1737dafc6ac..316af64e6de 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -188,11 +188,6 @@ struct r300_sampler_view { uint32_t texcache_region; }; -struct r300_texture_fb_state { - uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ -}; - struct r300_texture_sampler_state { struct r300_texture_format_state format; uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ @@ -405,8 +400,6 @@ struct r300_texture { /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; - /* All bits should be filled in. */ - struct r300_texture_fb_state fb_state; /* hyper-z memory allocs */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index e96d340d569..059c194e53f 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -656,29 +656,25 @@ void r300_texture_setup_format_state(struct r300_screen *screen, R300_TXO_MICRO_TILE(desc->microtile); } -static void r300_texture_setup_fb_state(struct r300_screen* screen, - struct r300_texture* tex) +static void r300_texture_setup_fb_state(struct r300_surface *surf) { - unsigned i; + struct r300_texture *tex = r300_texture(surf->base.texture); + unsigned level = surf->base.u.tex.level; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + if (util_format_is_depth_or_stencil(surf->base.format)) { + surf->pitch = + tex->desc.stride_in_pixels[level] | + R300_DEPTHMACROTILE(tex->desc.macrotile[level]) | R300_DEPTHMICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); + surf->format = r300_translate_zsformat(surf->base.format); } else { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - r300_translate_colorformat(tex->desc.b.b.format) | - R300_COLOR_TILE(tex->desc.macrotile[i]) | + surf->pitch = + tex->desc.stride_in_pixels[level] | + r300_translate_colorformat(surf->base.format) | + R300_COLOR_TILE(tex->desc.macrotile[level]) | R300_COLOR_MICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); + surf->format = r300_translate_out_fmt(surf->base.format); } } @@ -694,8 +690,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, util_format_short_name(new_format)); tex->format = new_format; - - r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } static unsigned r300_texture_is_referenced(struct pipe_context *context, @@ -786,7 +780,6 @@ r300_texture_create_object(struct r300_screen *rscreen, } /* Initialize the hardware state. */ r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format); - r300_texture_setup_fb_state(rscreen, tex); tex->desc.b.vtbl = &r300_texture_vtbl; pipe_reference_init(&tex->desc.b.b.reference, 1); @@ -916,8 +909,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->offset = r300_texture_get_offset(&tex->desc, level, surf_tmpl->u.tex.first_layer); - surface->pitch = tex->fb_state.pitch[level]; - surface->format = tex->fb_state.format; + r300_texture_setup_fb_state(surface); /* Parameters for the CBZB clear. */ surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; -- cgit v1.2.3 From ce9c0d280104c8001a3ee360b07218ad3d260e46 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 06:10:55 +0100 Subject: r300g: simplify WRITE_RELOC API and cleanup --- src/gallium/drivers/r300/r300_cb.h | 54 ++++++++++++-------- src/gallium/drivers/r300/r300_context.c | 4 +- src/gallium/drivers/r300/r300_context.h | 12 ++--- src/gallium/drivers/r300/r300_cs.h | 51 +++++++------------ src/gallium/drivers/r300/r300_emit.c | 84 ++++++++++++++++---------------- src/gallium/drivers/r300/r300_query.c | 10 ++-- src/gallium/drivers/r300/r300_render.c | 9 +--- src/gallium/drivers/r300/r300_state.c | 4 +- src/gallium/drivers/r300/r300_texture.c | 22 ++++----- src/gallium/drivers/r300/r300_transfer.c | 12 ++--- 10 files changed, 126 insertions(+), 136 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 9d3d4fc1b19..b373937a1f9 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -61,40 +61,52 @@ * that they neatly hide away, and don't have the cost of function setup, so * we're going to use them. */ -#ifdef DEBUG -#define CB_DEBUG(x) x -#else -#define CB_DEBUG(x) -#endif - - /** * Command buffer setup. */ +#ifdef DEBUG + #define CB_LOCALS \ - CB_DEBUG(int cs_count = 0;) \ + int cs_count = 0; \ uint32_t *cs_ptr = NULL; \ - CB_DEBUG((void) cs_count;) (void) cs_ptr; + (void) cs_count; (void) cs_ptr -#define NEW_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ - CB_DEBUG(cs_count = size;) \ +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr); \ } while (0) -#define BEGIN_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = ptr; \ - CB_DEBUG(cs_count = size;) \ +#define NEW_CB(ptr, size) \ + do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ } while (0) #define END_CB do { \ - CB_DEBUG(if (cs_count != 0) \ + if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ - cs_count, __FUNCTION__, __FILE__, __LINE__);) \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ } while (0) +#define CB_USED_DW(x) cs_count -= x + +#else + +#define CB_LOCALS \ + uint32_t *cs_ptr = NULL; (void) cs_ptr + +#define NEW_CB(ptr, size) \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)) + +#define BEGIN_CB(ptr, size) cs_ptr = (ptr) +#define END_CB +#define CB_USED_DW(x) + +#endif + /** * Storing pure DWORDs. @@ -103,13 +115,13 @@ #define OUT_CB(value) do { \ *cs_ptr = (value); \ cs_ptr++; \ - CB_DEBUG(cs_count--;) \ + CB_USED_DW(1); \ } while (0) #define OUT_CB_TABLE(values, count) do { \ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ cs_ptr += count; \ - CB_DEBUG(cs_count -= count;) \ + CB_USED_DW(count); \ } while (0) #define OUT_CB_32F(value) \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index c6e03050fc7..675877733cc 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -549,13 +549,13 @@ void r300_finish(struct r300_context *r300) for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]->texture) { r300->rws->buffer_wait(r300->rws, - r300_texture(fb->cbufs[i]->texture)->buffer); + r300_texture(fb->cbufs[i]->texture)->buf); return; } } if (fb->zsbuf && fb->zsbuf->texture) { r300->rws->buffer_wait(r300->rws, - r300_texture(fb->zsbuf->texture)->buffer); + r300_texture(fb->zsbuf->texture)->buf); } } } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 316af64e6de..a9ce7cca58b 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -273,8 +273,8 @@ struct r300_query { boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_buffer *buf; + struct r300_winsys_cs_buffer *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -305,8 +305,8 @@ struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_buffer *buf; + struct r300_winsys_cs_buffer *cs_buf; enum r300_buffer_domain domain; @@ -394,8 +394,8 @@ struct r300_texture { enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_buffer *buf; + struct r300_winsys_cs_buffer *cs_buf; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 6726f100e1b..2e52dfa43c6 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -35,12 +35,6 @@ * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#ifdef DEBUG -#define CS_DEBUG(x) x -#else -#define CS_DEBUG(x) -#endif - /** * Command submission setup. */ @@ -50,22 +44,29 @@ struct r300_winsys_screen *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; +#ifdef DEBUG + #define BEGIN_CS(size) do { \ assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ - CS_DEBUG(cs_count = size;) \ + cs_count = size; \ } while (0) -#ifdef DEBUG #define END_CS do { \ if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ cs_count, __FUNCTION__, __FILE__, __LINE__); \ cs_count = 0; \ } while (0) + +#define CS_USED_DW(x) cs_count -= (x) + #else + +#define BEGIN_CS(size) #define END_CS -#endif +#define CS_USED_DW(x) +#endif /** * Writing pure DWORDs. @@ -73,7 +74,7 @@ #define OUT_CS(value) do { \ cs_copy->buf[cs_copy->cdw++] = (value); \ - CS_DEBUG(cs_count--;) \ + CS_USED_DW(1); \ } while (0) #define OUT_CS_32F(value) \ @@ -98,7 +99,7 @@ #define OUT_CS_TABLE(values, count) do { \ memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \ cs_copy->cdw += count; \ - CS_DEBUG(cs_count -= count;) \ + CS_USED_DW(count); \ } while (0) @@ -106,27 +107,11 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset) do { \ - assert(bo); \ - OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo); \ - CS_DEBUG(cs_count -= 2;) \ -} while (0) - -#define OUT_CS_BUF_RELOC(bo, offset) do { \ - assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \ -} while (0) - -#define OUT_CS_TEX_RELOC(tex, offset) do { \ - assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset); \ -} while (0) - -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \ - assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \ - CS_DEBUG(cs_count -= 2;) \ +#define OUT_CS_RELOC(r) do { \ + assert((r)); \ + assert((r)->cs_buf); \ + cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \ + CS_USED_DW(2); \ } while (0) @@ -135,7 +120,7 @@ */ #define WRITE_CS_TABLE(values, count) do { \ - CS_DEBUG(assert(cs_count == 0);) \ + assert(cs_count == 0); \ memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ cs_copy->cdw += (count); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 60234497c95..a32c171460c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -352,11 +352,11 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset); + OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_RELOC(aa->dest); - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch); + OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); + OUT_CS_RELOC(aa->dest); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -391,11 +391,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(fb->cbufs[i]); - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset); + OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch); + OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); + OUT_CS_RELOC(surf); } /* Set up the ZB part of the CBZB clear. */ @@ -404,11 +404,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); + OUT_CS_RELOC(surf); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -420,11 +420,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); + OUT_CS_RELOC(surf); if (can_hyperz) { uint32_t surf_pitch; @@ -568,7 +568,6 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -586,25 +585,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4); + OUT_CS_RELOC(r300->query_current); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4); + OUT_CS_RELOC(r300->query_current); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -620,13 +619,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -634,16 +632,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -803,8 +800,8 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config); + OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); + OUT_CS_RELOC(tex); } } END_CS; @@ -892,7 +889,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde for (i = 0; i < vertex_array_count; i++) { buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b.b); + OUT_CS_RELOC(buf); } END_CS; } @@ -917,7 +914,8 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0); + OUT_CS(0); + OUT_CS_RELOC(r300_buffer(r300->vbo)); END_CS; } @@ -1194,15 +1192,15 @@ validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + assert(tex && tex->buf && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + assert(tex && tex->buf && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->zsbuf)->domain); } } @@ -1214,12 +1212,12 @@ validate: } tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer, + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 6223e043210..62dee8db59e 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); - q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer); + q->cs_buf = r300->rws->buffer_get_cs_handle(r300->rws, q->buf); return (struct pipe_query*)q; } @@ -71,7 +71,7 @@ static void r300_destroy_query(struct pipe_context* pipe, struct r300_context *r300 = r300_context(pipe); struct r300_query* q = r300_query(query); - r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); + r300->rws->buffer_reference(r300->rws, &q->buf, NULL); remove_from_list(q); FREE(query); } @@ -137,7 +137,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); + map = r300->rws->buffer_map(r300->rws, q->buf, r300->cs, flags); if (!map) return FALSE; @@ -148,7 +148,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, map++; } - r300->rws->buffer_unmap(r300->rws, q->buffer); + r300->rws->buffer_unmap(r300->rws, q->buf); *result = temp; return TRUE; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2b4aa9f438f..0df3f9a0ba3 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -531,17 +531,12 @@ static void r300_emit_draw_elements(struct r300_context *r300, (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } - /* INDX_BUFFER is a truly special packet3. - * Unlike most other packet3, where the offset is after the count, - * the order is reversed, so the relocation ends up carrying the - * size of the indexbuf instead of the offset. - */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords); - + OUT_CS(count_dwords); + OUT_CS_RELOC(r300_buffer(indexBuffer)); END_CS; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index aa4e05d4be5..7a6c2f512dd 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -625,10 +625,10 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS)) + tex->cs_buf, R300_REF_CS)) r300->context.flush(&r300->context, 0, NULL); - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buf, tex->desc.microtile, tex->desc.macrotile[level], tex->desc.stride_in_bytes[0]); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 059c194e53f..c6809756e21 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -700,7 +700,7 @@ static unsigned r300_texture_is_referenced(struct pipe_context *context, struct r300_texture *rtex = (struct r300_texture *)texture; if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->cs_buffer, R300_REF_CS)) + rtex->cs_buf, R300_REF_CS)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -713,7 +713,7 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; int i; - rws->buffer_reference(rws, &tex->buffer, NULL); + rws->buffer_reference(rws, &tex->buf, NULL); for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { if (tex->hiz_mem[i]) u_mmFreeMem(tex->hiz_mem[i]); @@ -733,7 +733,7 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, return FALSE; } - return rws->buffer_get_handle(rws, tex->buffer, + return rws->buffer_get_handle(rws, tex->buf, tex->desc.stride_in_bytes[0], whandle); } @@ -786,22 +786,22 @@ r300_texture_create_object(struct r300_screen *rscreen, tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : R300_DOMAIN_VRAM | R300_DOMAIN_GTT; - tex->buffer = buffer; + tex->buf = buffer; /* Create the backing buffer if needed. */ - if (!tex->buffer) { - tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + if (!tex->buf) { + tex->buf = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, base->bind, base->usage, tex->domain); - if (!tex->buffer) { + if (!tex->buf) { FREE(tex); return NULL; } } - tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer); + tex->cs_buf = rws->buffer_get_cs_handle(rws, tex->buf); - rws->buffer_set_tiling(rws, tex->buffer, + rws->buffer_set_tiling(rws, tex->buf, tex->desc.microtile, tex->desc.macrotile[0], tex->desc.stride_in_bytes[0]); @@ -899,8 +899,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - surface->buffer = tex->buffer; - surface->cs_buffer = tex->cs_buffer; + surface->buf = tex->buf; + surface->cs_buf = tex->cs_buf; /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index ae93fab554e..314513561ba 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -90,13 +90,13 @@ r300_texture_get_transfer(struct pipe_context *ctx, referenced_cs = r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS); + tex->cs_buf, R300_REF_CS); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_HW); + tex->cs_buf, R300_REF_HW); } blittable = ctx->screen->is_format_supported( @@ -235,12 +235,12 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->linear_texture->buffer, + r300transfer->linear_texture->buf, r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, r300->cs, + map = rws->buffer_map(rws, tex->buf, r300->cs, transfer->usage); if (!map) { @@ -261,8 +261,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_texture *tex = r300_texture(transfer->resource); if (r300transfer->linear_texture) { - rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); + rws->buffer_unmap(rws, r300transfer->linear_texture->buf); } else { - rws->buffer_unmap(rws, tex->buffer); + rws->buffer_unmap(rws, tex->buf); } } -- cgit v1.2.3 From 56ba7e913fef0ea2b1bead582108f9ab3ab8263d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 05:36:44 +0100 Subject: r300g: consolidate buffers and textures to r300_resource Transfers and create/destroy are still handled separately. --- src/gallium/drivers/r300/r300_blit.c | 6 +- src/gallium/drivers/r300/r300_context.c | 4 +- src/gallium/drivers/r300/r300_context.h | 26 +-- src/gallium/drivers/r300/r300_emit.c | 62 +++--- src/gallium/drivers/r300/r300_fs.c | 8 +- src/gallium/drivers/r300/r300_hyperz.c | 14 +- src/gallium/drivers/r300/r300_render.c | 10 +- src/gallium/drivers/r300/r300_resource.c | 20 +- src/gallium/drivers/r300/r300_screen.c | 2 +- src/gallium/drivers/r300/r300_screen_buffer.c | 42 ++-- src/gallium/drivers/r300/r300_screen_buffer.h | 23 --- src/gallium/drivers/r300/r300_state.c | 38 ++-- src/gallium/drivers/r300/r300_state_derived.c | 24 +-- src/gallium/drivers/r300/r300_texture.c | 101 ++++----- src/gallium/drivers/r300/r300_texture.h | 8 +- src/gallium/drivers/r300/r300_texture_desc.c | 283 +++++++++++++------------- src/gallium/drivers/r300/r300_texture_desc.h | 6 +- src/gallium/drivers/r300/r300_transfer.c | 32 +-- 18 files changed, 328 insertions(+), 381 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index e29990d4b95..cadd090d029 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -114,7 +114,7 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - return r300_texture(fb->zsbuf->texture)->desc.zmask_dwords[fb->zsbuf->u.tex.level]; + return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; } static uint32_t r300_depth_clear_value(enum pipe_format format, @@ -190,8 +190,8 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 675877733cc..960e3c346ea 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -549,13 +549,13 @@ void r300_finish(struct r300_context *r300) for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]->texture) { r300->rws->buffer_wait(r300->rws, - r300_texture(fb->cbufs[i]->texture)->buf); + r300_resource(fb->cbufs[i]->texture)->buf); return; } } if (fb->zsbuf && fb->zsbuf->texture) { r300->rws->buffer_wait(r300->rws, - r300_texture(fb->zsbuf->texture)->buf); + r300_resource(fb->zsbuf->texture)->buf); } } } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index a9ce7cca58b..e55f138d638 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -323,13 +323,9 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; - }; struct r300_texture_desc { - /* Parent class. */ - struct u_resource b; - /* Width, height, and depth. * Most of the time, these are equal to pipe_texture::width0, height0, * and depth0. However, NPOT 3D textures must have dimensions aligned @@ -388,20 +384,26 @@ struct r300_texture_desc { unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; }; -struct r300_texture { - struct r300_texture_desc desc; - - enum r300_buffer_domain domain; +struct r300_resource +{ + struct u_vbuf_resource b; - /* Pipe buffer backing this texture. */ + /* Winsys buffer backing this resource. */ struct r300_winsys_buffer *buf; struct r300_winsys_cs_buffer *cs_buf; + enum r300_buffer_domain domain; + + /* Constant buffers are in user memory. */ + uint8_t *constant_buffer; + + /* Texture description (addressing, layout, special features). */ + struct r300_texture_desc tex; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; - /* hyper-z memory allocs */ + /* HiZ memory allocations. */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; @@ -611,9 +613,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) return (struct r300_surface*)surf; } -static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) +static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex) { - return (struct r300_texture*)tex; + return (struct r300_resource*)tex; } static INLINE struct r300_context* r300_context(struct pipe_context* context) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a32c171460c..13c1f2d8424 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -89,7 +89,7 @@ static void get_rc_constant_state( struct rc_constant * constant) { struct r300_textures_state* texstate = r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -101,19 +101,19 @@ static void get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); - vec[0] = 1.0 / tex->desc.width0; - vec[1] = 1.0 / tex->desc.height0; + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->tex.width0; + vec[1] = 1.0 / tex->tex.height0; vec[2] = 0; vec[3] = 1; break; case RC_STATE_R300_TEXSCALE_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); /* Add a small number to the texture size to work around rounding errors in hw. */ - vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); - vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); - vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); + vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f); + vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f); + vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f); vec[3] = 1; break; @@ -428,9 +428,9 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) if (can_hyperz) { uint32_t surf_pitch; - struct r300_texture *tex; + struct r300_resource *tex; int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); + tex = r300_resource(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; @@ -779,7 +779,7 @@ void r300_emit_textures_state(struct r300_context *r300, { struct r300_textures_state *allstate = (struct r300_textures_state*)state; struct r300_texture_sampler_state *texstate; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; CS_LOCALS(r300); @@ -789,7 +789,7 @@ void r300_emit_textures_state(struct r300_context *r300, for (i = 0; i < allstate->count; i++) { if ((1 << i) & allstate->tx_enable) { texstate = &allstate->regs[i]; - tex = r300_texture(allstate->sampler_views[i]->base.texture); + tex = r300_resource(allstate->sampler_views[i]->base.texture); OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); @@ -846,7 +846,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; + struct r300_resource *buf; int i; unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; @@ -888,7 +888,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde } for (i = 0; i < vertex_array_count; i++) { - buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); OUT_CS_RELOC(buf); } END_CS; @@ -915,7 +915,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); OUT_CS(0); - OUT_CS_RELOC(r300_buffer(r300->vbo)); + OUT_CS_RELOC(r300_resource(r300->vbo)); END_CS; } @@ -1103,13 +1103,13 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) (struct r300_hyperz_state*)r300->hyperz_state.state; struct r300_screen* r300screen = r300->screen; uint32_t stride, offset = 0, height, offset_shift; - struct r300_texture* tex; + struct r300_resource* tex; int i; - tex = r300_texture(fb->zsbuf->texture); + tex = r300_resource(fb->zsbuf->texture); offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; + stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; /* convert from pixels to 4x4 blocks */ stride = ALIGN_DIVUP(stride, 4); @@ -1138,15 +1138,15 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_texture *tex; + struct r300_resource *tex; CS_LOCALS(r300); - tex = r300_texture(fb->zsbuf->texture); + tex = r300_resource(fb->zsbuf->texture); BEGIN_CS(size); OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); OUT_CS(0); - OUT_CS(tex->desc.zmask_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); OUT_CS(0); END_CS; @@ -1183,7 +1183,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; boolean flushed = FALSE; @@ -1191,14 +1191,14 @@ validate: if (r300->fb_state.dirty) { /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); + tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); + tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->zsbuf)->domain); @@ -1211,7 +1211,7 @@ validate: continue; } - tex = r300_texture(texstate->sampler_views[i]->base.texture); + tex = r300_resource(texstate->sampler_views[i]->base.texture); r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); } } @@ -1221,8 +1221,8 @@ validate: 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, + r300_resource(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; @@ -1232,14 +1232,14 @@ validate: if (!*buf) continue; - r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf, - r300_buffer(*buf)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, + r300_resource(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, + r300_resource(index_buffer)->domain, 0); /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index b2c02bec86c..cec7473009a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -152,13 +152,13 @@ static void get_external_state( for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; - struct r300_texture *t; + struct r300_resource *t; if (!s || !v) { continue; } - t = r300_texture(texstate->sampler_views[i]->base.texture); + t = r300_resource(texstate->sampler_views[i]->base.texture); if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -181,7 +181,7 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { + if (t->tex.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -201,7 +201,7 @@ static void get_external_state( state->unit[i].wrap_mode = RC_WRAP_NONE; } - if (t->desc.b.b.target == PIPE_TEXTURE_3D) + if (t->b.b.b.target == PIPE_TEXTURE_3D) state->unit[i].clamp_and_scale_before_fetch = TRUE; } } diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 7767275e67e..873e0209d42 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -137,8 +137,8 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; @@ -170,7 +170,7 @@ static void r300_update_hyperz(struct r300_context* r300) } } - if (zstex->desc.zcomp8x8[fb->zsbuf->u.tex.level]) { + if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } @@ -309,17 +309,17 @@ void r300_update_hyperz_state(struct r300_context* r300) void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) { - struct r300_texture *tex; + struct r300_resource *tex; uint32_t zsize, ndw; int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); + tex = r300_resource(surf->base.texture); if (tex->hiz_mem[level]) return; - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); + zsize = tex->tex.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->b.b.b.format); ndw = ALIGN_DIVUP(zsize, 64); tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 0df3f9a0ba3..eda5c48cfbb 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -250,7 +250,7 @@ static boolean r300_emit_states(struct r300_context *r300, if (user_buffers) r300->upload_vb_validated = TRUE; if (r300->index_buffer.buffer && - r300_buffer(r300->index_buffer.buffer)->b.user_ptr) { + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { r300->upload_ib_validated = TRUE; } } @@ -330,7 +330,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) { + if (!(r300_resource(buf)->domain & R300_DOMAIN_GTT)) { return FALSE; } @@ -536,7 +536,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS(count_dwords); - OUT_CS_RELOC(r300_buffer(indexBuffer)); + OUT_CS_RELOC(r300_resource(indexBuffer)); END_CS; } @@ -570,7 +570,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* Fallback for misaligned ushort indices. */ if (indexSize == 2 && (start & 1) && - !r300_buffer(indexBuffer)->b.user_ptr) { + !r300_resource(indexBuffer)->b.user_ptr) { struct pipe_transfer *transfer; struct pipe_resource *userbuf; @@ -592,7 +592,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } pipe_buffer_unmap(pipe, transfer); } else { - if (r300_buffer(indexBuffer)->b.user_ptr) + if (r300_resource(indexBuffer)->b.user_ptr) r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index dd1df970594..d788cedb174 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -38,26 +38,22 @@ r300_resource_create(struct pipe_screen *screen, } -static struct pipe_resource * -r300_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) +static unsigned r300_resource_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned level, int layer) { - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r300_texture_from_handle(screen, templ, whandle); + return r300_buffer_is_referenced(context, buf, R300_REF_CS); } void r300_init_resource_functions(struct r300_context *r300) { r300->context.get_transfer = u_get_transfer_vtbl; r300->context.transfer_map = u_transfer_map_vtbl; - r300->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r300->context.transfer_flush_region = u_default_transfer_flush_region; r300->context.transfer_unmap = u_transfer_unmap_vtbl; r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r300->context.is_resource_referenced = u_is_resource_referenced_vtbl; + r300->context.is_resource_referenced = r300_resource_is_referenced_by_cs; r300->context.create_surface = r300_create_surface; r300->context.surface_destroy = r300_surface_destroy; } @@ -65,8 +61,8 @@ void r300_init_resource_functions(struct r300_context *r300) void r300_init_screen_resource_functions(struct r300_screen *r300screen) { r300screen->screen.resource_create = r300_resource_create; - r300screen->screen.resource_from_handle = r300_resource_from_handle; - r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r300screen->screen.resource_from_handle = r300_texture_from_handle; + r300screen->screen.resource_get_handle = r300_resource_get_handle; r300screen->screen.resource_destroy = u_resource_destroy_vtbl; r300screen->screen.user_buffer_create = r300_user_buffer_create; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index fc8131f03be..f54ba4286d9 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -459,7 +459,7 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) rws->get_value(rws, R300_VID_DRM_2_3_0); util_slab_create(&r300screen->pool_buffers, - sizeof(struct r300_buffer), 64, + sizeof(struct r300_resource), 64, UTIL_SLAB_SINGLETHREADED); r300screen->rws = rws; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4a3cc7a3f41..cf4a2e9dae5 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -38,9 +38,9 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, enum r300_reference_domain domain) { struct r300_context *r300 = r300_context(context); - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_resource *rbuf = r300_resource(buf); - if (rbuf->b.user_ptr) + if (rbuf->b.user_ptr || rbuf->constant_buffer) return PIPE_UNREFERENCED; if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) @@ -49,20 +49,13 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, return PIPE_UNREFERENCED; } -static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - return r300_buffer_is_referenced(context, buf, R300_REF_CS); -} - void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, unsigned count) { unsigned index_offset; - uint8_t *ptr = r300_buffer(*index_buffer)->b.user_ptr; + uint8_t *ptr = r300_resource(*index_buffer)->b.user_ptr; boolean flushed; *index_buffer = NULL; @@ -85,7 +78,7 @@ static void r300_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_resource *rbuf = r300_resource(buf); struct r300_winsys_screen *rws = r300screen->rws; if (rbuf->constant_buffer) @@ -136,7 +129,7 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); uint8_t *map; if (rbuf->b.user_ptr) @@ -152,19 +145,12 @@ r300_buffer_transfer_map( struct pipe_context *pipe, return map + transfer->box.x; } -static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - /* no-op */ -} - static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { rws->buffer_unmap(rws, rbuf->buf); @@ -182,7 +168,7 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_winsys_screen *rws = r300->screen->rws; - struct r300_buffer *rbuf = r300_buffer(resource); + struct r300_resource *rbuf = r300_resource(resource); uint8_t *map = NULL; if (rbuf->constant_buffer) { @@ -201,13 +187,13 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, static const struct u_resource_vtbl r300_buffer_vtbl = { - u_default_resource_get_handle, /* get_handle */ + NULL, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + NULL, /* is_buffer_referenced */ r300_buffer_get_transfer, /* get_transfer */ r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ - r300_buffer_transfer_flush_region, /* transfer_flush_region */ + NULL, /* transfer_flush_region */ r300_buffer_transfer_unmap, /* transfer_unmap */ r300_buffer_transfer_inline_write /* transfer_inline_write */ }; @@ -216,13 +202,11 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; unsigned alignment = 16; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - rbuf->b.b.b = *templ; rbuf->b.b.vtbl = &r300_buffer_vtbl; pipe_reference_init(&rbuf->b.b.b.reference, 1); @@ -259,12 +243,10 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bind) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - pipe_reference_init(&rbuf->b.b.b.reference, 1); rbuf->b.b.b.screen = screen; rbuf->b.b.b.target = PIPE_BUFFER; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 1dfbc1399ba..3276f843b0c 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -35,29 +35,6 @@ #include "r300_winsys.h" #include "r300_context.h" -#define R300_BUFFER_MAGIC 0xabcd1234 -#define R300_BUFFER_MAX_RANGES 32 - -struct r300_buffer_range { - uint32_t start; - uint32_t end; -}; - -/* Vertex buffer. */ -struct r300_buffer -{ - struct u_vbuf_resource b; - - uint32_t magic; - - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; - - enum r300_buffer_domain domain; - - uint8_t *constant_buffer; -}; - /* Functions. */ void r300_upload_index_buffer(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 7a6c2f512dd..f0b4ad57bfc 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -616,12 +616,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_texture *tex, unsigned level) + struct r300_resource *tex, unsigned level) { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->desc.macrotile[tex->surface_level] != - tex->desc.macrotile[level]) { + if (tex->tex.macrotile[tex->surface_level] != + tex->tex.macrotile[level]) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (r300->rws->cs_is_buffer_referenced(r300->cs, @@ -629,8 +629,8 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, r300->context.flush(&r300->context, 0, NULL); r300->rws->buffer_set_tiling(r300->rws, tex->buf, - tex->desc.microtile, tex->desc.macrotile[level], - tex->desc.stride_in_bytes[0]); + tex->tex.microtile, tex->tex.macrotile[level], + tex->tex.stride_in_bytes[0]); tex->surface_level = level; } @@ -645,12 +645,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, /* Set tiling flags for new surfaces. */ for (i = 0; i < state->nr_cbufs; i++) { r300_tex_set_tiling_flags(r300, - r300_texture(state->cbufs[i]->texture), + r300_resource(state->cbufs[i]->texture), state->cbufs[i]->u.tex.level); } if (state->zsbuf) { r300_tex_set_tiling_flags(r300, - r300_texture(state->zsbuf->texture), + r300_resource(state->zsbuf->texture), state->zsbuf->u.tex.level); } } @@ -659,7 +659,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, const char *binding) { struct pipe_resource *tex = surf->texture; - struct r300_texture *rtex = r300_texture(tex); + struct r300_resource *rtex = r300_resource(tex); fprintf(stderr, "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " @@ -672,9 +672,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, util_format_short_name(surf->format), - rtex->desc.macrotile[0] ? "YES" : " NO", - rtex->desc.microtile ? "YES" : " NO", - rtex->desc.stride_in_pixels[0], + rtex->tex.macrotile[0] ? "YES" : " NO", + rtex->tex.microtile ? "YES" : " NO", + rtex->tex.stride_in_pixels[0], tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -802,7 +802,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, /* Setup Hyper-Z. */ if (can_hyperz) { struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex = r300_texture(zs_surf->base.texture); + struct r300_resource *tex = r300_resource(zs_surf->base.texture); int level = zs_surf->base.u.tex.level; /* work out whether we can support hiz on this buffer */ @@ -1313,7 +1313,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *texture; + struct r300_resource *texture; unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1342,8 +1342,8 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ - texture = r300_texture(views[i]->texture); - if (texture->desc.is_npot) { + texture = r300_resource(views[i]->texture); + if (texture->tex.is_npot) { r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); } @@ -1376,7 +1376,7 @@ r300_create_sampler_view(struct pipe_context *pipe, const struct pipe_sampler_view *templ) { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; @@ -1494,7 +1494,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, /* HW TCL. */ for (i = 0; i < count; i++) { if (buffers[i].buffer && - !r300_buffer(buffers[i].buffer)->b.user_ptr) { + !r300_resource(buffers[i].buffer)->b.user_ptr) { r300->validate_buffers = TRUE; } } @@ -1515,7 +1515,7 @@ static void r300_set_index_buffer(struct pipe_context* pipe, memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); if (r300->screen->caps.has_tcl && - !r300_buffer(ib->buffer)->b.user_ptr) { + !r300_resource(ib->buffer)->b.user_ptr) { r300->validate_buffers = TRUE; r300->upload_ib_validated = FALSE; } @@ -1738,7 +1738,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_resource *rbuf = r300_resource(buf); uint32_t *mapped; switch (shader) { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index de4c2713281..41a02f37ce0 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -691,7 +691,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_texture_sampler_state *texstate; struct r300_sampler_state *sampler; struct r300_sampler_view *view; - struct r300_texture *tex; + struct r300_resource *tex; unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); @@ -709,7 +709,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) state->tx_enable |= 1 << i; view = state->sampler_views[i]; - tex = r300_texture(view->base.texture); + tex = r300_resource(view->base.texture); sampler = state->sampler_states[i]; texstate = &state->regs[i]; @@ -725,32 +725,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, - tex->desc.b.b.last_level, view->base.u.tex.last_level); + tex->b.b.b.last_level, view->base.u.tex.last_level); min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, max_level); - if (tex->desc.is_npot && min_level > 0) { + if (tex->tex.is_npot && min_level > 0) { /* Even though we do not implement mipmapping for NPOT * textures, we should at least honor the minimum level * which is allowed to be displayed. We do this by setting up * an i-th mipmap level as the zero level. */ - r300_texture_setup_format_state(r300->screen, &tex->desc, + r300_texture_setup_format_state(r300->screen, tex, min_level, &texstate->format); texstate->format.tile_config |= - tex->desc.offset_in_bytes[min_level] & 0xffffffe0; - assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0); + tex->tex.offset_in_bytes[min_level] & 0xffffffe0; + assert((tex->tex.offset_in_bytes[min_level] & 0x1f) == 0); } /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; /* Depth textures are kinda special. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + if (util_format_is_depth_or_stencil(tex->b.b.b.format)) { unsigned char depth_swizzle[4]; if (!r300->screen->caps.is_r500 && - util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + util_format_get_blocksizebits(tex->b.b.b.format) == 32) { /* X24x8 is sampled as Y16X16 on r3xx-r4xx. * The depth here is at the Y component. */ for (j = 0; j < 4; j++) @@ -775,17 +775,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } if (r300->screen->caps.dxtc_swizzle && - util_format_is_compressed(tex->desc.b.b.format)) { + util_format_is_compressed(tex->b.b.b.format)) { texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; } /* to emulate 1D textures through 2D ones correctly */ - if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { + if (tex->b.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.is_npot) { + if (tex->tex.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index c6809756e21..5b4d7b72af1 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -609,11 +609,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) } void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out) { - struct pipe_resource *pt = &desc->b.b; + struct pipe_resource *pt = &tex->b.b.b; + struct r300_texture_desc *desc = &tex->tex; boolean is_r500 = screen->caps.is_r500; /* Mask out all the fields we change. */ @@ -658,22 +659,22 @@ void r300_texture_setup_format_state(struct r300_screen *screen, static void r300_texture_setup_fb_state(struct r300_surface *surf) { - struct r300_texture *tex = r300_texture(surf->base.texture); + struct r300_resource *tex = r300_resource(surf->base.texture); unsigned level = surf->base.u.tex.level; /* Set framebuffer state. */ if (util_format_is_depth_or_stencil(surf->base.format)) { surf->pitch = - tex->desc.stride_in_pixels[level] | - R300_DEPTHMACROTILE(tex->desc.macrotile[level]) | - R300_DEPTHMICROTILE(tex->desc.microtile); + tex->tex.stride_in_pixels[level] | + R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | + R300_DEPTHMICROTILE(tex->tex.microtile); surf->format = r300_translate_zsformat(surf->base.format); } else { surf->pitch = - tex->desc.stride_in_pixels[level] | + tex->tex.stride_in_pixels[level] | r300_translate_colorformat(surf->base.format) | - R300_COLOR_TILE(tex->desc.macrotile[level]) | - R300_COLOR_MICROTILE(tex->desc.microtile); + R300_COLOR_TILE(tex->tex.macrotile[level]) | + R300_COLOR_MICROTILE(tex->tex.microtile); surf->format = r300_translate_out_fmt(surf->base.format); } } @@ -692,24 +693,10 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, tex->format = new_format; } -static unsigned r300_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct r300_context *r300 = r300_context(context); - struct r300_texture *rtex = (struct r300_texture *)texture; - - if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->cs_buf, R300_REF_CS)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - - return PIPE_UNREFERENCED; -} - static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { - struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_resource* tex = (struct r300_resource*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; int i; @@ -722,36 +709,36 @@ static void r300_texture_destroy(struct pipe_screen *screen, FREE(tex); } -static boolean r300_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle) { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_resource* tex = (struct r300_resource*)texture; if (!tex) { return FALSE; } return rws->buffer_get_handle(rws, tex->buf, - tex->desc.stride_in_bytes[0], whandle); + tex->tex.stride_in_bytes[0], whandle); } static const struct u_resource_vtbl r300_texture_vtbl = { - r300_texture_get_handle, /* get_handle */ - r300_texture_destroy, /* resource_destroy */ - r300_texture_is_referenced, /* is_resource_referenced */ - r300_texture_get_transfer, /* get_transfer */ - r300_texture_transfer_destroy, /* transfer_destroy */ - r300_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - r300_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + NULL, /* get_handle */ + r300_texture_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + r300_texture_get_transfer, /* get_transfer */ + r300_texture_transfer_destroy, /* transfer_destroy */ + r300_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + r300_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; /* The common texture constructor. */ -static struct r300_texture* +static struct r300_resource* r300_texture_create_object(struct r300_screen *rscreen, const struct pipe_resource *base, enum r300_buffer_tiling microtile, @@ -761,7 +748,7 @@ r300_texture_create_object(struct r300_screen *rscreen, struct r300_winsys_buffer *buffer) { struct r300_winsys_screen *rws = rscreen->rws; - struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) rws->buffer_reference(rws, &buffer, NULL); @@ -769,7 +756,7 @@ r300_texture_create_object(struct r300_screen *rscreen, } /* Initialize the descriptor. */ - if (!r300_texture_desc_init(rscreen, &tex->desc, base, + if (!r300_texture_desc_init(rscreen, tex, base, microtile, macrotile, stride_in_bytes_override, max_buffer_size)) { @@ -779,10 +766,10 @@ r300_texture_create_object(struct r300_screen *rscreen, return NULL; } /* Initialize the hardware state. */ - r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format); + r300_texture_setup_format_state(rscreen, tex, 0, &tex->tx_format); - tex->desc.b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->b.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->b.b.b.reference, 1); tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : R300_DOMAIN_VRAM | R300_DOMAIN_GTT; @@ -790,7 +777,7 @@ r300_texture_create_object(struct r300_screen *rscreen, /* Create the backing buffer if needed. */ if (!tex->buf) { - tex->buf = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, base->bind, base->usage, tex->domain); if (!tex->buf) { @@ -802,8 +789,8 @@ r300_texture_create_object(struct r300_screen *rscreen, tex->cs_buf = rws->buffer_get_cs_handle(rws, tex->buf); rws->buffer_set_tiling(rws, tex->buf, - tex->desc.microtile, tex->desc.macrotile[0], - tex->desc.stride_in_bytes[0]); + tex->tex.microtile, tex->tex.macrotile[0], + tex->tex.stride_in_bytes[0]); return tex; } @@ -879,7 +866,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, struct pipe_resource* texture, const struct pipe_surface *surf_tmpl) { - struct r300_texture* tex = r300_texture(texture); + struct r300_resource* tex = r300_resource(texture); struct r300_surface* surface = CALLOC_STRUCT(r300_surface); unsigned level = surf_tmpl->u.tex.level; @@ -907,19 +894,19 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(&tex->desc, level, + surface->offset = r300_texture_get_offset(tex, level, surf_tmpl->u.tex.first_layer); r300_texture_setup_fb_state(surface); /* Parameters for the CBZB clear. */ - surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_allowed = tex->tex.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, - tex->desc.b.b.nr_samples, - tex->desc.microtile, - tex->desc.macrotile[level], + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_HEIGHT, 0); surface->cbzb_height = align((surface->base.height + 1) / 2, @@ -928,7 +915,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ offset = surface->offset + - tex->desc.stride_in_bytes[level] * surface->cbzb_height; + tex->tex.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -943,8 +930,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->cbzb_allowed ? "YES" : " NO", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->desc.microtile ? "YES" : " NO", - tex->desc.macrotile[level] ? "YES" : " NO"); + tex->tex.microtile ? "YES" : " NO", + tex->tex.macrotile[level] ? "YES" : " NO"); } return &surface->base; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 0ab22f747e4..a4838bea81e 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -32,7 +32,7 @@ struct pipe_resource; struct winsys_handle; struct r300_texture_format_state; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -57,10 +57,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format); boolean r300_is_sampler_format_supported(enum pipe_format format); void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out); +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle); + struct pipe_resource* r300_texture_from_handle(struct pipe_screen* screen, const struct pipe_resource* base, diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 4faa88f4c52..eb946ba7c28 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -90,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } /* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, +static boolean r300_texture_macro_switch(struct r300_resource *tex, unsigned level, boolean rv350_mode, enum r300_dim dim) { unsigned tile, texdim; - tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, - desc->microtile, R300_BUFFER_TILED, dim, 0); + tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, + tex->tex.microtile, R300_BUFFER_TILED, dim, 0); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->width0, level); + texdim = u_minify(tex->tex.width0, level); } else { - texdim = u_minify(desc->height0, level); + texdim = u_minify(tex->tex.height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -118,7 +118,7 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, * at the given level. */ static unsigned r300_texture_get_stride(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level) { unsigned tile_width, width, stride; @@ -126,62 +126,62 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, screen->caps.family == CHIP_FAMILY_RS690 || screen->caps.family == CHIP_FAMILY_RS740); - if (desc->stride_in_bytes_override) - return desc->stride_in_bytes_override; + if (tex->tex.stride_in_bytes_override) + return tex->tex.stride_in_bytes_override; /* Check the level. */ - if (level > desc->b.b.last_level) { + if (level > tex->b.b.b.last_level) { SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, desc->b.b.last_level); + __FUNCTION__, level, tex->b.b.b.last_level); return 0; } - width = u_minify(desc->width0, level); + width = u_minify(tex->tex.width0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_width = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], + if (util_format_is_plain(tex->b.b.b.format)) { + tile_width = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_WIDTH, is_rs690); width = align(width, tile_width); - stride = util_format_get_stride(desc->b.b.format, width); + stride = util_format_get_stride(tex->b.b.b.format, width); /* The alignment to 32 bytes is sort of implied by the layout... */ return stride; } else { - return align(util_format_get_stride(desc->b.b.format, width), is_rs690 ? 64 : 32); + return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32); } } -static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, +static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, unsigned level, boolean *out_aligned_for_cbzb) { unsigned height, tile_height; - height = u_minify(desc->height0, level); + height = u_minify(tex->tex.height0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], + if (util_format_is_plain(tex->b.b.b.format)) { + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_HEIGHT, 0); height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ - if ((desc->b.b.target != PIPE_TEXTURE_1D && - desc->b.b.target != PIPE_TEXTURE_2D && - desc->b.b.target != PIPE_TEXTURE_RECT) || - desc->b.b.last_level != 0) { + if ((tex->b.b.b.target != PIPE_TEXTURE_1D && + tex->b.b.b.target != PIPE_TEXTURE_2D && + tex->b.b.b.target != PIPE_TEXTURE_RECT) || + tex->b.b.b.last_level != 0) { height = util_next_power_of_two(height); } /* See if the CBZB clear can be used on the buffer, * taking the texture size into account. */ if (out_aligned_for_cbzb) { - if (desc->macrotile[level]) { + if (tex->tex.macrotile[level]) { /* When clearing, the layer (width*height) is horizontally split * into two, and the upper and lower halves are cleared by the CB * and ZB units, respectively. Therefore, the number of macrotiles @@ -189,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, /* Align the height so that there is an even number of macrotiles. * Do so for 3 or more macrotiles in the Y direction. */ - if (level == 0 && desc->b.b.last_level == 0 && - (desc->b.b.target == PIPE_TEXTURE_1D || - desc->b.b.target == PIPE_TEXTURE_2D || - desc->b.b.target == PIPE_TEXTURE_RECT) && + if (level == 0 && tex->b.b.b.last_level == 0 && + (tex->b.b.b.target == PIPE_TEXTURE_1D || + tex->b.b.b.target == PIPE_TEXTURE_2D || + tex->b.b.b.target == PIPE_TEXTURE_RECT) && height >= tile_height * 3) { height = align(height, tile_height * 2); } @@ -204,11 +204,11 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, } } - return util_format_get_nblocksy(desc->b.b.format, height); + return util_format_get_nblocksy(tex->b.b.b.format, height); } static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures * incorrectly. This is a workaround to prevent CS from being rejected. */ @@ -216,17 +216,17 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, unsigned i, size; if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - desc->b.b.target == PIPE_TEXTURE_3D && - desc->b.b.last_level > 0) { + tex->b.b.b.target == PIPE_TEXTURE_3D && + tex->b.b.b.last_level > 0) { size = 0; - for (i = 0; i <= desc->b.b.last_level; i++) { - size += desc->stride_in_bytes[i] * - r300_texture_get_nblocksy(desc, i, FALSE); + for (i = 0; i <= tex->b.b.b.last_level; i++) { + size += tex->tex.stride_in_bytes[i] * + r300_texture_get_nblocksy(tex, i, FALSE); } - size *= desc->depth0; - desc->size_in_bytes = size; + size *= tex->tex.depth0; + tex->tex.size_in_bytes = size; } } @@ -239,15 +239,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, boolean align_for_cbzb) { - struct pipe_resource *base = &desc->b.b; + struct pipe_resource *base = &tex->b.b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; - desc->size_in_bytes = 0; + tex->tex.size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -255,21 +255,21 @@ static void r300_setup_miptree(struct r300_screen *screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - desc->macrotile[i] = - (desc->macrotile[0] == R300_BUFFER_TILED && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + tex->tex.macrotile[i] = + (tex->tex.macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; - stride = r300_texture_get_stride(screen, desc, i); + stride = r300_texture_get_stride(screen, tex, i); /* Compute the number of blocks in Y, see if the CBZB clear can be * used on the texture. */ aligned_for_cbzb = FALSE; - if (align_for_cbzb && desc->cbzb_allowed[i]) - nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + if (align_for_cbzb && tex->tex.cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); else - nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + nblocksy = r300_texture_get_nblocksy(tex, i, NULL); layer_size = stride * nblocksy; @@ -280,64 +280,64 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(desc->depth0, i); + size = layer_size * u_minify(tex->tex.depth0, i); - desc->offset_in_bytes[i] = desc->size_in_bytes; - desc->size_in_bytes = desc->offset_in_bytes[i] + size; - desc->layer_size_in_bytes[i] = layer_size; - desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); - desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; + tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; + tex->tex.layer_size_in_bytes[i] = layer_size; + tex->tex.stride_in_bytes[i] = stride; + tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride); + tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(desc->width0, i), u_minify(desc->height0, i), - u_minify(desc->depth0, i), stride, desc->size_in_bytes, - desc->macrotile[i] ? "TRUE" : "FALSE"); + i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), + u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, + tex->tex.macrotile[i] ? "TRUE" : "FALSE"); } } -static void r300_setup_flags(struct r300_texture_desc *desc) +static void r300_setup_flags(struct r300_resource *tex) { - desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - (desc->stride_in_bytes_override && - stride_to_width(desc->b.b.format, - desc->stride_in_bytes_override) != desc->b.b.width0); - - desc->is_npot = - desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0) || - !util_is_power_of_two(desc->b.b.depth0); + tex->tex.uses_stride_addressing = + !util_is_power_of_two(tex->b.b.b.width0) || + (tex->tex.stride_in_bytes_override && + stride_to_width(tex->b.b.b.format, + tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); + + tex->tex.is_npot = + tex->tex.uses_stride_addressing || + !util_is_power_of_two(tex->b.b.b.height0) || + !util_is_power_of_two(tex->b.b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { unsigned i, bpp; boolean first_level_valid; - bpp = util_format_get_blocksizebits(desc->b.b.format); + bpp = util_format_get_blocksizebits(tex->b.b.b.format); /* 1) The texture must be point-sampled, * 2) The depth must be 16 or 32 bits. * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage * with certain texture sizes. Macrotiling ensures the alignment. */ - first_level_valid = desc->b.b.nr_samples <= 1 && + first_level_valid = tex->b.b.b.nr_samples <= 1 && (bpp == 16 || bpp == 32) && - desc->macrotile[0]; + tex->tex.macrotile[0]; if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) first_level_valid = FALSE; - for (i = 0; i <= desc->b.b.last_level; i++) - desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + for (i = 0; i <= tex->b.b.b.last_level; i++) + tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) static void r300_setup_zmask_flags(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { /* The tile size of 1 DWORD is: * @@ -351,9 +351,9 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; - if (util_format_is_depth_or_stencil(desc->b.b.format) && - util_format_get_blocksizebits(desc->b.b.format) == 32 && - desc->microtile) { + if (util_format_is_depth_or_stencil(tex->b.b.b.format) && + util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + tex->tex.microtile) { unsigned i, pipes; if (screen->caps.family == CHIP_FAMILY_RV530) { @@ -362,18 +362,18 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, pipes = screen->caps.num_frag_pipes; } - for (i = 0; i <= desc->b.b.last_level; i++) { + for (i = 0; i <= tex->b.b.b.last_level; i++) { unsigned numdw, compsize; /* The 8x8 compression mode needs macrotiling. */ compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && - desc->macrotile[i] && - desc->b.b.nr_samples <= 1 ? 8 : 4; + tex->tex.macrotile[i] && + tex->b.b.b.nr_samples <= 1 ? 8 : 4; /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(desc->stride_in_pixels[i], + numdw = align(tex->tex.stride_in_pixels[i], num_blocks_x_per_dw[pipes-1] * compsize) * - align(u_minify(desc->b.b.height0, i), + align(u_minify(tex->b.b.b.height0, i), num_blocks_y_per_dw[pipes-1] * compsize); /* Convert pixels -> dwords. */ @@ -382,21 +382,21 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, /* Check that we have enough ZMASK memory. */ if (numdw <= screen->caps.zmask_ram * pipes) { - desc->zmask_dwords[i] = numdw; - desc->zcomp8x8[i] = compsize == 8; + tex->tex.zmask_dwords[i] = numdw; + tex->tex.zcomp8x8[i] = compsize == 8; } else { - desc->zmask_dwords[i] = 0; - desc->zcomp8x8[i] = FALSE; + tex->tex.zmask_dwords[i] = 0; + tex->tex.zcomp8x8[i] = FALSE; } } } } static void r300_setup_tiling(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { struct r300_winsys_screen *rws = screen->rws; - enum pipe_format format = desc->b.b.format; + enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); @@ -406,7 +406,7 @@ static void r300_setup_tiling(struct r300_screen *screen, } /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) { return; } @@ -415,12 +415,12 @@ static void r300_setup_tiling(struct r300_screen *screen, case 1: case 4: case 8: - desc->microtile = R300_BUFFER_TILED; + tex->tex.microtile = R300_BUFFER_TILED; break; case 2: if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { - desc->microtile = R300_BUFFER_SQUARETILED; + tex->tex.microtile = R300_BUFFER_SQUARETILED; } break; } @@ -430,105 +430,104 @@ static void r300_setup_tiling(struct r300_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { - desc->macrotile[0] = R300_BUFFER_TILED; + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { + tex->tex.macrotile[0] = R300_BUFFER_TILED; } } -static void r300_tex_print_info(struct r300_screen *rscreen, - struct r300_texture_desc *desc, +static void r300_tex_print_info(struct r300_resource *tex, const char *func) { fprintf(stderr, "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " "LastLevel: %i, Size: %i, Format: %s\n", func, - desc->macrotile[0] ? "YES" : " NO", - desc->microtile ? "YES" : " NO", - desc->stride_in_pixels[0], - desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, - desc->b.b.last_level, desc->size_in_bytes, - util_format_short_name(desc->b.b.format)); + tex->tex.macrotile[0] ? "YES" : " NO", + tex->tex.microtile ? "YES" : " NO", + tex->tex.stride_in_pixels[0], + tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0, + tex->b.b.b.last_level, tex->tex.size_in_bytes, + util_format_short_name(tex->b.b.b.format)); } boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, + struct r300_resource *tex, const struct pipe_resource *base, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size) { - desc->b.b = *base; - desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; - desc->width0 = base->width0; - desc->height0 = base->height0; - desc->depth0 = base->depth0; + tex->b.b.b = *base; + tex->b.b.b.screen = &rscreen->screen; + tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->tex.width0 = base->width0; + tex->tex.height0 = base->height0; + tex->tex.depth0 = base->depth0; - r300_setup_flags(desc); + r300_setup_flags(tex); /* Align a 3D NPOT texture to POT. */ - if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { - desc->width0 = util_next_power_of_two(desc->width0); - desc->height0 = util_next_power_of_two(desc->height0); - desc->depth0 = util_next_power_of_two(desc->depth0); + if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { + tex->tex.width0 = util_next_power_of_two(tex->tex.width0); + tex->tex.height0 = util_next_power_of_two(tex->tex.height0); + tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); } /* Setup tiling. */ if (microtile == R300_BUFFER_SELECT_LAYOUT || macrotile == R300_BUFFER_SELECT_LAYOUT) { - r300_setup_tiling(rscreen, desc); + r300_setup_tiling(rscreen, tex); } else { - desc->microtile = microtile; - desc->macrotile[0] = macrotile; - assert(desc->b.b.last_level == 0); + tex->tex.microtile = microtile; + tex->tex.macrotile[0] = macrotile; + assert(tex->b.b.b.last_level == 0); } - r300_setup_cbzb_flags(rscreen, desc); + r300_setup_cbzb_flags(rscreen, tex); /* Setup the miptree description. */ - r300_setup_miptree(rscreen, desc, TRUE); + r300_setup_miptree(rscreen, tex, TRUE); /* If the required buffer size is larger the given max size, * try again without the alignment for the CBZB clear. */ - if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { - r300_setup_miptree(rscreen, desc, FALSE); + if (max_buffer_size && tex->tex.size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, desc); - r300_setup_zmask_flags(rscreen, desc); + r300_texture_3d_fix_mipmapping(rscreen, tex); + r300_setup_zmask_flags(rscreen, tex); if (max_buffer_size) { /* Make sure the buffer we got is large enough. */ - if (desc->size_in_bytes > max_buffer_size) { + if (tex->tex.size_in_bytes > max_buffer_size) { fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", - max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + max_buffer_size, tex->tex.size_in_bytes); + r300_tex_print_info(tex, "texture_desc_init"); return FALSE; } - desc->buffer_size_in_bytes = max_buffer_size; + tex->tex.buffer_size_in_bytes = max_buffer_size; } else { - desc->buffer_size_in_bytes = desc->size_in_bytes; + tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes; } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + r300_tex_print_info(tex, "texture_desc_init"); return TRUE; } -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer) { - unsigned offset = desc->offset_in_bytes[level]; + unsigned offset = tex->tex.offset_in_bytes[level]; - switch (desc->b.b.target) { + switch (tex->b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * desc->layer_size_in_bytes[level]; + return offset + layer * tex->tex.layer_size_in_bytes[level]; default: assert(layer == 0); diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 121d215b4cb..24db5f5fc98 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -30,7 +30,7 @@ struct pipe_resource; struct r300_screen; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; enum r300_dim { DIM_WIDTH = 0, @@ -44,14 +44,14 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, + struct r300_resource *tex, const struct pipe_resource *base, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size); -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer); #endif diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 314513561ba..7265fa733e9 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -37,7 +37,7 @@ struct r300_transfer { unsigned offset; /* Linear texture. */ - struct r300_texture *linear_texture; + struct r300_resource *linear_texture; }; /* Convenience cast wrapper. */ @@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0, 0, 0, 0, tex, transfer->level, &transfer->box); } @@ -70,7 +70,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->linear_texture->desc.b.b, 0, &src_box); + &r300transfer->linear_texture->b.b.b, 0, &src_box); ctx->flush(ctx, 0, NULL); } @@ -83,7 +83,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, const struct pipe_box *box) { struct r300_context *r300 = r300_context(ctx); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; @@ -114,7 +114,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->desc.microtile || tex->desc.macrotile[level] || + if (tex->tex.microtile || tex->tex.macrotile[level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -140,7 +140,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); @@ -149,14 +149,14 @@ r300_texture_get_transfer(struct pipe_context *ctx, * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->desc.microtile && !tex->desc.macrotile[level]) { + if (!tex->tex.microtile && !tex->tex.macrotile[level]) { goto unpipelined; } @@ -168,8 +168,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->linear_texture->desc.microtile && - !trans->linear_texture->desc.macrotile[0]); + assert(!trans->linear_texture->tex.microtile && + !trans->linear_texture->tex.macrotile[0]); /* Set the stride. * @@ -179,7 +179,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - trans->linear_texture->desc.stride_in_bytes[0]; + trans->linear_texture->tex.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -194,8 +194,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = tex->desc.stride_in_bytes[level]; - trans->offset = r300_texture_get_offset(&tex->desc, level, box->z); + trans->transfer.stride = tex->tex.stride_in_bytes[level]; + trans->offset = r300_texture_get_offset(tex, level, box->z); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -227,9 +227,9 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); char *map; - enum pipe_format format = tex->desc.b.b.format; + enum pipe_format format = tex->b.b.b.format; if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped @@ -258,7 +258,7 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); if (r300transfer->linear_texture) { rws->buffer_unmap(rws, r300transfer->linear_texture->buf); -- cgit v1.2.3 From 7c24a4c6a86402be1f68d23f4d52d4d071957801 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 10:16:21 +0100 Subject: r300g: add a way to change texture properties arbitrarily So that we can implement resource_copy on arbitrary data. --- src/gallium/drivers/r300/r300_blit.c | 51 +++++++++++++----------- src/gallium/drivers/r300/r300_context.h | 4 ++ src/gallium/drivers/r300/r300_screen_buffer.c | 2 + src/gallium/drivers/r300/r300_state_derived.c | 13 ++++-- src/gallium/drivers/r300/r300_texture.c | 57 ++++++++++++++++----------- src/gallium/drivers/r300/r300_texture.h | 5 ++- src/gallium/drivers/r300/r300_texture_desc.c | 37 +++++++++-------- src/gallium/drivers/r300/r300_texture_desc.h | 6 +-- src/gallium/drivers/r300/r300_transfer.c | 5 +++ 9 files changed, 104 insertions(+), 76 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index cadd090d029..3fa1504ecd9 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -412,9 +412,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, struct r300_context *r300 = r300_context(pipe); struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - enum pipe_format old_format = dst->format; - enum pipe_format new_format = old_format; - const struct util_format_description *desc = util_format_description(old_format); + struct pipe_resource old_src = *src; + struct pipe_resource old_dst = *dst; + struct pipe_resource new_src = old_src; + struct pipe_resource new_dst = old_dst; + const struct util_format_description *desc = + util_format_description(dst->format); if (r300->zmask_in_use && !r300->zmask_locked) { if (fb->zsbuf->texture == src || @@ -429,46 +432,46 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || !pipe->screen->is_format_supported(pipe->screen, - old_format, src->target, + src->format, src->target, src->nr_samples, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0))) { - switch (util_format_get_blocksize(old_format)) { + PIPE_BIND_SAMPLER_VIEW, 0) || + !pipe->screen->is_format_supported(pipe->screen, + dst->format, dst->target, + dst->nr_samples, + PIPE_BIND_RENDER_TARGET, 0))) { + switch (util_format_get_blocksize(old_dst.format)) { case 1: - new_format = PIPE_FORMAT_I8_UNORM; + new_dst.format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_B4G4R4A4_UNORM; + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_B8G8R8A8_UNORM; + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; break; case 8: - new_format = PIPE_FORMAT_R16G16B16A16_UNORM; + new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" "r300: surface_copy: Software fallback doesn't work for tiled textures.\n", - util_format_short_name(old_format)); + util_format_short_name(dst->format)); } + new_src.format = new_dst.format; } - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, new_format); - r300_texture_reinterpret_format(pipe->screen, - src, new_format); - } + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &new_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &new_dst); r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, old_format); - r300_texture_reinterpret_format(pipe->screen, - src, old_format); - } + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &old_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); if (r300->zmask_locked) { r300->zmask_locked = FALSE; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e55f138d638..480233bae5b 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -392,6 +392,7 @@ struct r300_resource struct r300_winsys_buffer *buf; struct r300_winsys_cs_buffer *cs_buf; enum r300_buffer_domain domain; + unsigned buf_size; /* Constant buffers are in user memory. */ uint8_t *constant_buffer; @@ -403,6 +404,9 @@ struct r300_resource /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; + /* Where the texture starts in the buffer. */ + unsigned tex_offset; + /* HiZ memory allocations. */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index cf4a2e9dae5..bc4762c108a 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -214,6 +214,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.user_ptr = NULL; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; + rbuf->buf_size = templ->width0; rbuf->constant_buffer = NULL; /* Alloc constant buffers in RAM. */ @@ -262,6 +263,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, rbuf->b.user_ptr = ptr; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; + rbuf->buf_size = size; rbuf->constant_buffer = NULL; return &rbuf->b.b.b; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 41a02f37ce0..41b57b502fc 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -733,13 +733,18 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Even though we do not implement mipmapping for NPOT * textures, we should at least honor the minimum level * which is allowed to be displayed. We do this by setting up - * an i-th mipmap level as the zero level. */ + * the i-th mipmap level as the zero level. */ + unsigned offset = tex->tex_offset + + tex->tex.offset_in_bytes[min_level]; + r300_texture_setup_format_state(r300->screen, tex, min_level, &texstate->format); - texstate->format.tile_config |= - tex->tex.offset_in_bytes[min_level] & 0xffffffe0; - assert((tex->tex.offset_in_bytes[min_level] & 0x1f) == 0); + texstate->format.tile_config |= offset & 0xffffffe0; + assert((offset & 0x1f) == 0); + } else { + texstate->format.tile_config |= tex->tex_offset & 0xffffffe0; + assert((tex->tex_offset & 0x1f) == 0); } /* Assign a texture cache region. */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 5b4d7b72af1..8ed03745134 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -679,18 +679,27 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) } } -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format) + unsigned offset, + const struct pipe_resource *new_properties) { - struct r300_screen *r300screen = r300_screen(screen); + struct r300_screen *rscreen = r300_screen(screen); + struct r300_resource *res = r300_resource(tex); - SCREEN_DBG(r300screen, DBG_TEX, - "r300: texture_reinterpret_format: %s -> %s\n", + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_set_properties: %s -> %s\n", util_format_short_name(tex->format), - util_format_short_name(new_format)); + util_format_short_name(new_properties->format)); - tex->format = new_format; + if (!r300_texture_desc_init(rscreen, res, new_properties)) { + fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n"); + return FALSE; + } + res->tex_offset = offset; + r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format); + + return TRUE; } static void r300_texture_destroy(struct pipe_screen *screen, @@ -755,28 +764,30 @@ r300_texture_create_object(struct r300_screen *rscreen, return NULL; } - /* Initialize the descriptor. */ - if (!r300_texture_desc_init(rscreen, tex, base, - microtile, macrotile, - stride_in_bytes_override, - max_buffer_size)) { + pipe_reference_init(&tex->b.b.b.reference, 1); + tex->b.b.b.screen = &rscreen->screen; + tex->b.b.b.usage = base->usage; + tex->b.b.b.bind = base->bind; + tex->b.b.b.flags = base->flags; + tex->b.b.vtbl = &r300_texture_vtbl; + tex->tex.microtile = microtile; + tex->tex.macrotile[0] = macrotile; + tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buf_size = max_buffer_size; + + if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) rws->buffer_reference(rws, &buffer, NULL); FREE(tex); return NULL; } - /* Initialize the hardware state. */ - r300_texture_setup_format_state(rscreen, tex, 0, &tex->tx_format); - - tex->b.b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.b.reference, 1); - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; - tex->buf = buffer; /* Create the backing buffer if needed. */ - if (!tex->buf) { + if (!buffer) { + tex->buf_size = tex->tex.size_in_bytes; tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, base->bind, base->usage, tex->domain); @@ -784,6 +795,8 @@ r300_texture_create_object(struct r300_screen *rscreen, FREE(tex); return NULL; } + } else { + tex->buf = buffer; } tex->cs_buf = rws->buffer_get_cs_handle(rws, tex->buf); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index a4838bea81e..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format); + unsigned offset, + const struct pipe_resource *new_properties); boolean r300_is_colorbuffer_format_supported(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index eb946ba7c28..221e5a314ac 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -401,6 +401,9 @@ static void r300_setup_tiling(struct r300_screen *screen, boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + tex->tex.microtile = R300_BUFFER_LINEAR; + tex->tex.macrotile[0] = R300_BUFFER_LINEAR; + if (!util_format_is_plain(format)) { return; } @@ -453,15 +456,16 @@ static void r300_tex_print_info(struct r300_resource *tex, boolean r300_texture_desc_init(struct r300_screen *rscreen, struct r300_resource *tex, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size) + const struct pipe_resource *base) { - tex->b.b.b = *base; - tex->b.b.b.screen = &rscreen->screen; - tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->b.b.b.target = base->target; + tex->b.b.b.format = base->format; + tex->b.b.b.width0 = base->width0; + tex->b.b.b.height0 = base->height0; + tex->b.b.b.depth0 = base->depth0; + tex->b.b.b.array_size = base->array_size; + tex->b.b.b.last_level = base->last_level; + tex->b.b.b.nr_samples = base->nr_samples; tex->tex.width0 = base->width0; tex->tex.height0 = base->height0; tex->tex.depth0 = base->depth0; @@ -476,13 +480,8 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, } /* Setup tiling. */ - if (microtile == R300_BUFFER_SELECT_LAYOUT || - macrotile == R300_BUFFER_SELECT_LAYOUT) { + if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) { r300_setup_tiling(rscreen, tex); - } else { - tex->tex.microtile = microtile; - tex->tex.macrotile[0] = macrotile; - assert(tex->b.b.b.last_level == 0); } r300_setup_cbzb_flags(rscreen, tex); @@ -491,24 +490,24 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, r300_setup_miptree(rscreen, tex, TRUE); /* If the required buffer size is larger the given max size, * try again without the alignment for the CBZB clear. */ - if (max_buffer_size && tex->tex.size_in_bytes > max_buffer_size) { + if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) { r300_setup_miptree(rscreen, tex, FALSE); } r300_texture_3d_fix_mipmapping(rscreen, tex); r300_setup_zmask_flags(rscreen, tex); - if (max_buffer_size) { + if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ - if (tex->tex.size_in_bytes > max_buffer_size) { + if (tex->tex.size_in_bytes > tex->buf_size) { fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", - max_buffer_size, tex->tex.size_in_bytes); + tex->buf_size, tex->tex.size_in_bytes); r300_tex_print_info(tex, "texture_desc_init"); return FALSE; } - tex->tex.buffer_size_in_bytes = max_buffer_size; + tex->tex.buffer_size_in_bytes = tex->buf_size; } else { tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes; } diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 24db5f5fc98..ce6e9643ec6 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -45,11 +45,7 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, boolean r300_texture_desc_init(struct r300_screen *rscreen, struct r300_resource *tex, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size); + const struct pipe_resource *base); unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 7265fa733e9..f2b6b45ef1b 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -116,6 +116,11 @@ r300_texture_get_transfer(struct pipe_context *ctx, * Also make write transfers pipelined. */ if (tex->tex.microtile || tex->tex.macrotile[level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { + if (r300->blitter->running) { + fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); + os_break(); + } + base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; -- cgit v1.2.3 From fea4ad8f6629cdc83fcffe0db455bba186fb1b1a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 10 Feb 2011 11:20:05 +0100 Subject: r300g: implement accelerated copy_region for compressed formats --- src/gallium/drivers/r300/r300_blit.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 3fa1504ecd9..14c9794888c 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -418,6 +418,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, struct pipe_resource new_dst = old_dst; const struct util_format_description *desc = util_format_description(dst->format); + struct pipe_box box; if (r300->zmask_in_use && !r300->zmask_locked) { if (fb->zsbuf->texture == src || @@ -429,6 +430,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + /* Handle non-renderable plain formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || !pipe->screen->is_format_supported(pipe->screen, @@ -460,6 +462,33 @@ static void r300_resource_copy_region(struct pipe_context *pipe, new_src.format = new_dst.format; } + /* Handle compressed formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + switch (util_format_get_blocksize(old_dst.format)) { + case 8: + /* 1 pixel = 4 bits, + * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; + break; + case 16: + /* 1 pixel = 8 bits, + * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + } + + /* Since the pixels are 4 times larger, we must decrease + * the image size and the coordinates 4 times. */ + new_src.format = new_dst.format; + new_dst.height0 /= 4; + new_src.height0 /= 4; + dsty /= 4; + box = *src_box; + box.y /= 4; + box.height /= 4; + src_box = &box; + } + if (old_src.format != new_src.format) r300_resource_set_properties(pipe->screen, src, 0, &new_src); if (old_dst.format != new_dst.format) -- cgit v1.2.3 From 6ccab620a0e7364ab6c0d902b3ddf58ee988f7fa Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 23 Dec 2010 19:40:54 +1000 Subject: r300g: import the last bits of libdrm and cleanup the whole thing Based on Dave's branch. The majority of this commit is a cleanup, mainly renaming things. There wasn't much code to import, just ioctl calls. Also done: - implemented unsynchronized bo_map (important optimization!) - radeon_bo_is_referenced_by_cs is no longer a refcount hack - dropped the libdrm_radeon dependency I'm surprised that this has resulted in less code in the end. --- src/gallium/drivers/r300/r300_context.c | 6 +- src/gallium/drivers/r300/r300_context.h | 13 +- src/gallium/drivers/r300/r300_query.c | 9 +- src/gallium/drivers/r300/r300_render.c | 40 +- src/gallium/drivers/r300/r300_resource.c | 2 +- src/gallium/drivers/r300/r300_screen_buffer.c | 18 +- src/gallium/drivers/r300/r300_screen_buffer.h | 3 +- src/gallium/drivers/r300/r300_state.c | 8 +- src/gallium/drivers/r300/r300_texture.c | 19 +- src/gallium/drivers/r300/r300_transfer.c | 15 +- src/gallium/drivers/r300/r300_winsys.h | 78 ++-- src/gallium/targets/dri-r300/Makefile | 2 - src/gallium/targets/dri-r300/SConscript | 2 - src/gallium/targets/dri-r600/SConscript | 2 - src/gallium/targets/egl-static/SConscript | 1 - src/gallium/targets/egl/Makefile | 2 +- src/gallium/targets/xorg-radeon/Makefile | 2 +- src/gallium/winsys/radeon/drm/Makefile | 5 +- src/gallium/winsys/radeon/drm/SConscript | 3 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 543 ++++++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 78 ++++ src/gallium/winsys/radeon/drm/radeon_drm_buffer.c | 535 --------------------- src/gallium/winsys/radeon/drm/radeon_drm_buffer.h | 53 --- src/gallium/winsys/radeon/drm/radeon_drm_common.c | 58 ++- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 27 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 15 +- src/gallium/winsys/radeon/drm/radeon_r300.c | 181 -------- src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 - 28 files changed, 780 insertions(+), 943 deletions(-) create mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_bo.c create mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_bo.h delete mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_buffer.c delete mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_buffer.h delete mode 100644 src/gallium/winsys/radeon/drm/radeon_r300.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 960e3c346ea..3608c04dc93 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -548,14 +548,12 @@ void r300_finish(struct r300_context *r300) for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]->texture) { - r300->rws->buffer_wait(r300->rws, - r300_resource(fb->cbufs[i]->texture)->buf); + r300->rws->buffer_wait(r300_resource(fb->cbufs[i]->texture)->buf); return; } } if (fb->zsbuf && fb->zsbuf->texture) { - r300->rws->buffer_wait(r300->rws, - r300_resource(fb->zsbuf->texture)->buf); + r300->rws->buffer_wait(r300_resource(fb->zsbuf->texture)->buf); } } } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 480233bae5b..9335c680bf6 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -34,6 +34,7 @@ #include "r300_defines.h" #include "r300_screen.h" +#include "r300_winsys.h" struct u_upload_mgr; struct r300_context; @@ -273,8 +274,8 @@ struct r300_query { boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -305,8 +306,8 @@ struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; @@ -389,8 +390,8 @@ struct r300_resource struct u_vbuf_resource b; /* Winsys buffer backing this resource. */ - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; unsigned buf_size; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 62dee8db59e..da871dc3a87 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -60,7 +60,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); - q->cs_buf = r300->rws->buffer_get_cs_handle(r300->rws, q->buf); + q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; } @@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_context *r300 = r300_context(pipe); struct r300_query* q = r300_query(query); - r300->rws->buffer_reference(r300->rws, &q->buf, NULL); + r300_winsys_bo_reference(&q->buf, NULL); remove_from_list(q); FREE(query); } @@ -137,7 +136,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buf, r300->cs, flags); + map = r300->rws->buffer_map(q->buf, r300->cs, flags); if (!map) return FALSE; @@ -148,7 +147,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, map++; } - r300->rws->buffer_unmap(r300->rws, q->buf); + r300->rws->buffer_unmap(q->buf); *result = temp; return TRUE; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index eda5c48cfbb..ca55984ad9e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -330,15 +330,10 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if (!(r300_resource(buf)->domain & R300_DOMAIN_GTT)) { + if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) { return FALSE; } - if (r300_buffer_is_referenced(&r300->context, buf, - R300_REF_CS | R300_REF_HW)) { - /* It's a very bad idea to map it... */ - return FALSE; - } checked[vbi] = TRUE; } } @@ -395,7 +390,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, r300->vbuf_mgr->real_vertex_buffer[vbi], - PIPE_TRANSFER_READ, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &transfer[vbi]); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } @@ -575,7 +571,9 @@ static void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_resource *userbuf; uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &transfer); + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &transfer); if (mode == PIPE_PRIM_TRIANGLES) { memcpy(indices3, ptr + start, 6); @@ -771,7 +769,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (r300->vbuf_mgr->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, r300->vbuf_mgr->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } @@ -779,7 +778,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (indexed) { indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, - PIPE_TRANSFER_READ, &ib_transfer); + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer); } draw_set_mapped_index_buffer(r300->draw, indices); @@ -876,7 +876,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, r300->vbo, - PIPE_TRANSFER_WRITE, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, &r300render->vbo_transfer); assert(r300render->vbo_ptr); @@ -952,23 +953,6 @@ static void r300_render_draw_arrays(struct vbuf_render* render, return; } - /* Uncomment to dump all VBOs rendered through this interface. - * Slow and noisy! - ptr = pipe_buffer_map(&r300render->r300->context, - r300render->vbo, PIPE_TRANSFER_READ, - &r300render->vbo_transfer); - - for (i = 0; i < count; i++) { - printf("r300: Vertex %d\n", i); - draw_dump_emitted_vertex(&r300->vertex_info, ptr); - ptr += r300->vertex_info.size * 4; - printf("\n"); - } - - pipe_buffer_unmap(&r300render->r300->context, r300render->vbo, - r300render->vbo_transfer); - */ - BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index d788cedb174..f3d8c5b889f 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -42,7 +42,7 @@ static unsigned r300_resource_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *buf, unsigned level, int layer) { - return r300_buffer_is_referenced(context, buf, R300_REF_CS); + return r300_buffer_is_referenced(context, buf); } void r300_init_resource_functions(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index bc4762c108a..2e85e2d6ffb 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -34,8 +34,7 @@ #include "r300_winsys.h" unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain) + struct pipe_resource *buf) { struct r300_context *r300 = r300_context(context); struct r300_resource *rbuf = r300_resource(buf); @@ -43,7 +42,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, if (rbuf->b.user_ptr || rbuf->constant_buffer) return PIPE_UNREFERENCED; - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -79,13 +78,12 @@ static void r300_buffer_destroy(struct pipe_screen *screen, { struct r300_screen *r300screen = r300_screen(screen); struct r300_resource *rbuf = r300_resource(buf); - struct r300_winsys_screen *rws = r300screen->rws; if (rbuf->constant_buffer) FREE(rbuf->constant_buffer); if (rbuf->buf) - rws->buffer_reference(rws, &rbuf->buf, NULL); + r300_winsys_bo_reference(&rbuf->buf, NULL); util_slab_free(&r300screen->pool_buffers, rbuf); } @@ -137,7 +135,7 @@ r300_buffer_transfer_map( struct pipe_context *pipe, if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; - map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); + map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; @@ -153,7 +151,7 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } } @@ -177,12 +175,12 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, } assert(rbuf->b.user_ptr == NULL); - map = rws->buffer_map(rws, rbuf->buf, r300->cs, + map = rws->buffer_map(rbuf->buf, r300->cs, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); memcpy(map + box->x, data, box->width); - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } static const struct u_resource_vtbl r300_buffer_vtbl = @@ -229,7 +227,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); + r300screen->rws->buffer_get_cs_handle(rbuf->buf); if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 3276f843b0c..ae87c4406a7 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -50,8 +50,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bind); unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain); + struct pipe_resource *buf); /* Inline functions. */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index f0b4ad57bfc..2ec96003795 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -616,7 +616,8 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_resource *tex, unsigned level) + struct r300_resource *tex, + unsigned level) { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ @@ -624,11 +625,10 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, tex->tex.macrotile[level]) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buf, R300_REF_CS)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf)) r300->context.flush(&r300->context, 0, NULL); - r300->rws->buffer_set_tiling(r300->rws, tex->buf, + r300->rws->buffer_set_tiling(tex->buf, tex->tex.microtile, tex->tex.macrotile[level], tex->tex.stride_in_bytes[0]); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 8ed03745134..45a896d6109 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -706,10 +706,9 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { struct r300_resource* tex = (struct r300_resource*)texture; - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; int i; - rws->buffer_reference(rws, &tex->buf, NULL); + r300_winsys_bo_reference(&tex->buf, NULL); for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { if (tex->hiz_mem[i]) u_mmFreeMem(tex->hiz_mem[i]); @@ -729,7 +728,7 @@ boolean r300_resource_get_handle(struct pipe_screen* screen, return FALSE; } - return rws->buffer_get_handle(rws, tex->buf, + return rws->buffer_get_handle(tex->buf, tex->tex.stride_in_bytes[0], whandle); } @@ -754,13 +753,13 @@ r300_texture_create_object(struct r300_screen *rscreen, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size, - struct r300_winsys_buffer *buffer) + struct r300_winsys_bo *buffer) { struct r300_winsys_screen *rws = rscreen->rws; struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); return NULL; } @@ -780,7 +779,7 @@ r300_texture_create_object(struct r300_screen *rscreen, if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); FREE(tex); return NULL; } @@ -799,9 +798,9 @@ r300_texture_create_object(struct r300_screen *rscreen, tex->buf = buffer; } - tex->cs_buf = rws->buffer_get_cs_handle(rws, tex->buf); + tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); - rws->buffer_set_tiling(rws, tex->buf, + rws->buffer_set_tiling(tex->buf, tex->tex.microtile, tex->tex.macrotile[0], tex->tex.stride_in_bytes[0]); @@ -835,7 +834,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen *rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; + struct r300_winsys_bo *buffer; enum r300_buffer_tiling microtile, macrotile; unsigned stride, size; @@ -851,7 +850,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, if (!buffer) return NULL; - rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); + rws->buffer_get_tiling(buffer, µtile, ¯otile); /* Enforce a microtiled zbuffer. */ if (util_format_is_depth_or_stencil(base->format) && diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index f2b6b45ef1b..b5572128874 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -89,14 +89,12 @@ r300_texture_get_transfer(struct pipe_context *ctx, boolean referenced_cs, referenced_hw, blittable; referenced_cs = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buf, R300_REF_CS); + r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buf, R300_REF_HW); + r300->rws->buffer_is_busy(tex->buf); } blittable = ctx->screen->is_format_supported( @@ -239,13 +237,12 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ - return rws->buffer_map(rws, - r300transfer->linear_texture->buf, + return rws->buffer_map(r300transfer->linear_texture->buf, r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buf, r300->cs, + map = rws->buffer_map(tex->buf, r300->cs, transfer->usage); if (!map) { @@ -266,8 +263,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_resource *tex = r300_resource(transfer->resource); if (r300transfer->linear_texture) { - rws->buffer_unmap(rws, r300transfer->linear_texture->buf); + rws->buffer_unmap(r300transfer->linear_texture->buf); } else { - rws->buffer_unmap(rws, tex->buf); + rws->buffer_unmap(tex->buf); } } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 2e8ccdde544..bf1dd5c980e 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -28,18 +28,21 @@ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then * call r300_screen_create to start things. */ +#include "r300_defines.h" + +#include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "r300_defines.h" - #define R300_MAX_CMDBUF_DWORDS (16 * 1024) struct winsys_handle; struct r300_winsys_screen; -struct r300_winsys_buffer; /* for map/unmap etc. */ -struct r300_winsys_cs_buffer; /* for write_reloc etc. */ +#define r300_winsys_bo pb_buffer +#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src) + +struct r300_winsys_cs_handle; /* for write_reloc etc. */ struct r300_winsys_cs { unsigned cdw; /* Number of used dwords. */ @@ -66,11 +69,6 @@ enum r300_value_id { R300_CAN_AACOMPRESS, /* CMask */ }; -enum r300_reference_domain { /* bitfield */ - R300_REF_CS = 1, - R300_REF_HW = 2 -}; - struct r300_winsys_screen { /** * Destroy this winsys. @@ -107,82 +105,68 @@ struct r300_winsys_screen { * \param domain A bitmask of the R300_DOMAIN_* flags. * \return The created buffer object. */ - struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws, unsigned size, unsigned alignment, unsigned bind, unsigned usage, enum r300_buffer_domain domain); - struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)( - struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - - /** - * Reference a buffer object (assign with reference counting). - * - * \param ws The winsys this function is called from. - * \param pdst A destination pointer to set the source buffer to. - * \param src A source buffer object. - */ - void (*buffer_reference)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + struct r300_winsys_cs_handle *(*buffer_get_cs_handle)( + struct r300_winsys_bo *buf); /** * Map the entire data store of a buffer object into the client's address * space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. * \param usage A bitmask of the PIPE_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ - void *(*buffer_map)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void *(*buffer_map)(struct r300_winsys_bo *buf, struct r300_winsys_cs *cs, enum pipe_transfer_usage usage); /** * Unmap a buffer object from the client's address space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to unmap. */ - void (*buffer_unmap)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_unmap)(struct r300_winsys_bo *buf); + + /** + * Return TRUE if a buffer object is being used by the GPU. + * + * \param buf A winsys buffer object. + */ + boolean (*buffer_is_busy)(struct r300_winsys_bo *buf); /** * Wait for a buffer object until it is not used by a GPU. This is * equivalent to a fence placed after the last command using the buffer, * and synchronizing to the fence. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to wait for. */ - void (*buffer_wait)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_wait)(struct r300_winsys_bo *buf); /** * Return tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the flags from. * \param macrotile A pointer to the return value of the microtile flag. * \param microtile A pointer to the return value of the macrotile flag. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_get_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_get_tiling)(struct r300_winsys_bo *buf, enum r300_buffer_tiling *microtile, enum r300_buffer_tiling *macrotile); /** * Set tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to set the flags for. * \param macrotile A macrotile flag. * \param microtile A microtile flag. @@ -190,8 +174,7 @@ struct r300_winsys_screen { * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_set_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_set_tiling)(struct r300_winsys_bo *buf, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride); @@ -206,7 +189,7 @@ struct r300_winsys_screen { * \param stride The returned buffer stride in bytes. * \param size The returned buffer size. */ - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, unsigned *stride, unsigned *size); @@ -215,14 +198,12 @@ struct r300_winsys_screen { * Get a winsys handle from a winsys buffer. The internal structure * of the handle is platform-specific and only a winsys should access it. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the handle from. * \param whandle A winsys handle pointer. * \param stride A stride of the buffer in bytes, for texturing. * \return TRUE on success. */ - boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + boolean (*buffer_get_handle)(struct r300_winsys_bo *buf, unsigned stride, struct winsys_handle *whandle); @@ -257,7 +238,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_add_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, + struct r300_winsys_cs_handle *buf, enum r300_buffer_domain rd, enum r300_buffer_domain wd); @@ -278,7 +259,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf); + struct r300_winsys_cs_handle *buf); /** * Flush a command stream. @@ -300,16 +281,13 @@ struct r300_winsys_screen { void *user); /** - * Return TRUE if a buffer is referenced by a command stream or by hardware - * (i.e. is busy), based on the domain parameter. + * Return TRUE if a buffer is referenced by a command stream. * * \param cs A command stream. * \param buf A winsys buffer. - * \param domain A bitmask of the R300_REF_* enums. */ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_reference_domain domain); + struct r300_winsys_cs_handle *buf); }; #endif /* R300_WINSYS_H */ diff --git a/src/gallium/targets/dri-r300/Makefile b/src/gallium/targets/dri-r300/Makefile index 9afbb13276d..cc77a4bc20d 100644 --- a/src/gallium/targets/dri-r300/Makefile +++ b/src/gallium/targets/dri-r300/Makefile @@ -22,6 +22,4 @@ DRIVER_DEFINES = \ include ../Makefile.dri -DRI_LIB_DEPS += -ldrm_radeon - symlinks: diff --git a/src/gallium/targets/dri-r300/SConscript b/src/gallium/targets/dri-r300/SConscript index 005b4bbf7f1..683b6c6972d 100644 --- a/src/gallium/targets/dri-r300/SConscript +++ b/src/gallium/targets/dri-r300/SConscript @@ -2,8 +2,6 @@ Import('*') env = drienv.Clone() -env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') - env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD']) env.Prepend(LIBS = [ diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index aa771db2d1a..1df11a8747b 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -2,8 +2,6 @@ Import('*') env = drienv.Clone() -env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') - env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) env.Prepend(LIBS = [ diff --git a/src/gallium/targets/egl-static/SConscript b/src/gallium/targets/egl-static/SConscript index 974c35e46a7..cbd98cc416a 100644 --- a/src/gallium/targets/egl-static/SConscript +++ b/src/gallium/targets/egl-static/SConscript @@ -104,7 +104,6 @@ if env['drm']: ]) if env['drm_radeon']: - env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') env.Append(CPPDEFINES = ['_EGL_PIPE_R300', '_EGL_PIPE_R600']) env.Prepend(LIBS = [ radeonwinsys, diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index 7e846438455..de01939e5f1 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -92,7 +92,7 @@ nouveau_LIBS := \ # r300 pipe driver r300_CPPFLAGS := -r300_SYS := -ldrm -ldrm_radeon +r300_SYS := -ldrm r300_LIBS := \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index d3bc3569929..6d5f2c3d16e 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -19,6 +19,6 @@ DRIVER_PIPES = \ $(TOP)/src/gallium/drivers/rbug/librbug.a DRIVER_LINKS = \ - $(shell pkg-config --libs libdrm libdrm_radeon) + $(shell pkg-config --libs libdrm) include ../Makefile.xorg diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index 7e339a2ecfe..e63ae6f5006 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -5,10 +5,9 @@ include $(TOP)/configs/current LIBNAME = radeonwinsys C_SOURCES = \ - radeon_drm_buffer.c \ + radeon_drm_bo.c \ radeon_drm_cs.c \ - radeon_drm_common.c \ - radeon_r300.c + radeon_drm_common.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r300 \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 4ddb0919f6a..b16e03556d3 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -3,10 +3,9 @@ Import('*') env = env.Clone() radeon_sources = [ - 'radeon_drm_buffer.c', + 'radeon_drm_bo.c', 'radeon_drm_cs.c', 'radeon_drm_common.c', - 'radeon_r300.c', ] try: diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c new file mode 100644 index 00000000000..1a7804fc6ac --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -0,0 +1,543 @@ +#define _FILE_OFFSET_BITS 64 +#include "radeon_drm_cs.h" + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_thread.h" + +#include "state_tracker/drm_driver.h" + +#include +#include +#include +#include + +#define RADEON_BO_FLAGS_MACRO_TILE 1 +#define RADEON_BO_FLAGS_MICRO_TILE 2 +#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 + +extern const struct pb_vtbl radeon_bo_vtbl; + + +static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo) +{ + assert(bo->vtbl == &radeon_bo_vtbl); + return (struct radeon_bo *)bo; +} + +struct radeon_bomgr { + /* Base class. */ + struct pb_manager base; + + /* Winsys. */ + struct radeon_drm_winsys *rws; + + /* List of buffer handles and its mutex. */ + struct util_hash_table *bo_handles; + pipe_mutex bo_handles_mutex; +}; + +static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) +{ + return (struct radeon_bomgr *)mgr; +} + +static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) +{ + struct radeon_bo *bo = NULL; + + if (_buf->vtbl == &radeon_bo_vtbl) { + bo = radeon_bo(_buf); + } else { + struct pb_buffer *base_buf; + pb_size offset; + pb_get_base_buffer(_buf, &base_buf, &offset); + + if (base_buf->vtbl == &radeon_bo_vtbl) + bo = radeon_bo(base_buf); + } + + return bo; +} + +void radeon_bo_unref(struct radeon_bo *bo) +{ + struct drm_gem_close args = {}; + + if (!p_atomic_dec_zero(&bo->cref)) + return; + + if (bo->name) { + pipe_mutex_lock(bo->mgr->bo_handles_mutex); + util_hash_table_remove(bo->mgr->bo_handles, + (void*)(uintptr_t)bo->name); + pipe_mutex_unlock(bo->mgr->bo_handles_mutex); + } + + if (bo->ptr) + munmap(bo->ptr, bo->size); + + /* Close object. */ + args.handle = bo->handle; + drmIoctl(bo->mgr->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); + FREE(bo); +} + +static void radeon_bo_wait(struct r300_winsys_bo *_buf) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_wait_idle args = {}; + + args.handle = bo->handle; + while (drmCommandWriteRead(bo->mgr->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); +} + +static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_busy args = {}; + + args.handle = bo->handle; + return drmCommandWriteRead(bo->mgr->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; +} + +static void radeon_bo_destroy(struct pb_buffer *_buf) +{ + struct radeon_bo *bo = radeon_bo(_buf); + + radeon_bo_unref(bo); +} + +static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) +{ + unsigned res = 0; + + if (usage & PIPE_TRANSFER_READ) + res |= PB_USAGE_CPU_READ; + + if (usage & PIPE_TRANSFER_WRITE) + res |= PB_USAGE_CPU_WRITE; + + if (usage & PIPE_TRANSFER_DONTBLOCK) + res |= PB_USAGE_DONTBLOCK; + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + res |= PB_USAGE_UNSYNCHRONIZED; + + return res; +} + +static void *radeon_bo_map_internal(struct pb_buffer *_buf, + unsigned flags, void *flush_ctx) +{ + struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_drm_cs *cs = flush_ctx; + struct drm_radeon_gem_mmap args = {}; + + if (flags & PB_USAGE_DONTBLOCK) { + /* Note how we use radeon_bo_is_referenced_by_cs here. There are + * basically two places this map function can be called from: + * - pb_map + * - create_buffer (in the buffer reuse case) + * + * Since pb managers are per-winsys managers, not per-context managers, + * and we shouldn't reuse buffers if they are in-use in any context, + * we simply ask: is this buffer referenced by *any* CS? + * + * The problem with buffer_create is that it comes from pipe_screen, + * so we have no CS to look at, though luckily the following code + * is sufficient to tell whether the buffer is in use. */ + if (_buf->base.usage & RADEON_PB_USAGE_CACHE) { + if (radeon_bo_is_referenced_by_any_cs(bo)) + return NULL; + } + + if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data); + return NULL; /* It's very unlikely that the buffer is not busy. */ + } + + if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) { + return NULL; + } + } + + /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ + if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { + if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data); + } + + radeon_bo_wait((struct r300_winsys_bo*)bo); + } + + /* Map buffer if it's not already mapped. */ + /* XXX We may get a race in bo->ptr. */ + if (!bo->ptr) { + void *ptr; + + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->size; + if (drmCommandWriteRead(bo->mgr->rws->fd, + DRM_RADEON_GEM_MMAP, + &args, + sizeof(args))) { + fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", + bo, bo->handle); + return NULL; + } + ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + bo->mgr->rws->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); + return NULL; + } + bo->ptr = ptr; + } + + return bo->ptr; +} + +static void radeon_bo_unmap_internal(struct pb_buffer *_buf) +{ + /* NOP */ +} + +static void radeon_bo_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + +static enum pipe_error radeon_bo_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + /* Always pinned */ + return PIPE_OK; +} + +static void radeon_bo_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ +} + +const struct pb_vtbl radeon_bo_vtbl = { + radeon_bo_destroy, + radeon_bo_map_internal, + radeon_bo_unmap_internal, + radeon_bo_validate, + radeon_bo_fence, + radeon_bo_get_base_buffer, +}; + +static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct radeon_bomgr *mgr = radeon_bomgr(_mgr); + struct radeon_drm_winsys *rws = mgr->rws; + struct radeon_bo *bo; + struct drm_radeon_gem_create args = {}; + + args.size = size; + args.alignment = desc->alignment; + args.initial_domain = + (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? + RADEON_GEM_DOMAIN_GTT : 0) | + (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? + RADEON_GEM_DOMAIN_VRAM : 0); + + if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, + &args, sizeof(args))) { + fprintf(stderr, "Failed to allocate :\n"); + fprintf(stderr, " size : %d bytes\n", size); + fprintf(stderr, " alignment : %d bytes\n", desc->alignment); + fprintf(stderr, " domains : %d\n", args.initial_domain); + return NULL; + } + + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment = desc->alignment; + bo->base.base.usage = desc->usage; + bo->base.base.size = size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->mgr = mgr; + bo->handle = args.handle; + bo->size = size; + + radeon_bo_ref(bo); + return &bo->base; +} + +static void radeon_bomgr_flush(struct pb_manager *mgr) +{ + /* NOP */ +} + +static void radeon_bomgr_destroy(struct pb_manager *_mgr) +{ + struct radeon_bomgr *mgr = radeon_bomgr(_mgr); + util_hash_table_destroy(mgr->bo_handles); + pipe_mutex_destroy(mgr->bo_handles_mutex); + FREE(mgr); +} + +static unsigned handle_hash(void *key) +{ + return (unsigned)key; +} + +static int handle_compare(void *key1, void *key2) +{ + return !((int)key1 == (int)key2); +} + +struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws) +{ + struct radeon_bomgr *mgr; + + mgr = CALLOC_STRUCT(radeon_bomgr); + if (!mgr) + return NULL; + + mgr->base.destroy = radeon_bomgr_destroy; + mgr->base.create_buffer = radeon_bomgr_create_bo; + mgr->base.flush = radeon_bomgr_flush; + + mgr->rws = rws; + mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); + pipe_mutex_init(mgr->bo_handles_mutex); + return &mgr->base; +} + +static void *radeon_bo_map(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage) +{ + struct pb_buffer *_buf = pb_buffer(buf); + + return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs); +} + +static void radeon_bo_get_tiling(struct r300_winsys_bo *_buf, + enum r300_buffer_tiling *microtiled, + enum r300_buffer_tiling *macrotiled) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_set_tiling args = {}; + + args.handle = bo->handle; + + drmCommandWriteRead(bo->mgr->rws->fd, + DRM_RADEON_GEM_GET_TILING, + &args, + sizeof(args)); + + *microtiled = R300_BUFFER_LINEAR; + *macrotiled = R300_BUFFER_LINEAR; + if (args.tiling_flags & RADEON_BO_FLAGS_MICRO_TILE) + *microtiled = R300_BUFFER_TILED; + + if (args.tiling_flags & RADEON_BO_FLAGS_MACRO_TILE) + *macrotiled = R300_BUFFER_TILED; +} + +static void radeon_bo_set_tiling(struct r300_winsys_bo *_buf, + enum r300_buffer_tiling microtiled, + enum r300_buffer_tiling macrotiled, + uint32_t pitch) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_set_tiling args = {}; + + if (microtiled == R300_BUFFER_TILED) + args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE; + else if (microtiled == R300_BUFFER_SQUARETILED) + args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; + + if (macrotiled == R300_BUFFER_TILED) + args.tiling_flags |= RADEON_BO_FLAGS_MACRO_TILE; + + args.handle = bo->handle; + args.pitch = pitch; + + drmCommandWriteRead(bo->mgr->rws->fd, + DRM_RADEON_GEM_SET_TILING, + &args, + sizeof(args)); +} + +static struct r300_winsys_cs_handle *radeon_drm_get_cs_handle( + struct r300_winsys_bo *_buf) +{ + /* return radeon_bo. */ + return (struct r300_winsys_cs_handle*) + get_radeon_bo(pb_buffer(_buf)); +} + +static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, + enum r300_buffer_domain domain) +{ + unsigned res = 0; + + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + res |= RADEON_PB_USAGE_CACHE; + + if (domain & R300_DOMAIN_GTT) + res |= RADEON_PB_USAGE_DOMAIN_GTT; + + if (domain & R300_DOMAIN_VRAM) + res |= RADEON_PB_USAGE_DOMAIN_VRAM; + + return res; +} + +static struct r300_winsys_bo * +radeon_winsys_bo_create(struct r300_winsys_screen *rws, + unsigned size, + unsigned alignment, + unsigned bind, + unsigned usage, + enum r300_buffer_domain domain) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct pb_desc desc; + struct pb_manager *provider; + struct pb_buffer *buffer; + + memset(&desc, 0, sizeof(desc)); + desc.alignment = alignment; + desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); + + /* Assign a buffer manager. */ + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + provider = ws->cman; + else + provider = ws->kman; + + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + return (struct r300_winsys_bo*)buffer; +} + +static struct r300_winsys_bo *radeon_winsys_bo_from_handle(struct r300_winsys_screen *rws, + struct winsys_handle *whandle, + unsigned *stride, + unsigned *size) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_bo *bo; + struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); + struct drm_gem_open open_arg = {}; + + /* We must maintain a list of pairs , so that we always return + * the same BO for one particular handle. If we didn't do that and created + * more than one BO for the same handle and then relocated them in a CS, + * we would hit a deadlock in the kernel. + * + * The list of pairs is guarded by a mutex, of course. */ + pipe_mutex_lock(mgr->bo_handles_mutex); + + /* First check if there already is an existing bo for the handle. */ + bo = util_hash_table_get(mgr->bo_handles, (void*)(uintptr_t)whandle->handle); + if (bo) { + /* Increase the refcount. */ + struct pb_buffer *b = NULL; + pb_reference(&b, &bo->base); + goto done; + } + + /* There isn't, create a new one. */ + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) { + goto fail; + } + + /* Open the BO. */ + open_arg.name = whandle->handle; + if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { + goto fail; + } + bo->handle = open_arg.handle; + bo->size = open_arg.size; + bo->name = whandle->handle; + radeon_bo_ref(bo); + + /* Initialize it. */ + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment = 0; + bo->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; + bo->base.base.size = bo->size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->mgr = mgr; + + util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); + +done: + pipe_mutex_unlock(mgr->bo_handles_mutex); + + if (stride) + *stride = whandle->stride; + if (size) + *size = bo->base.base.size; + + return (struct r300_winsys_bo*)bo; + +fail: + pipe_mutex_unlock(mgr->bo_handles_mutex); + return NULL; +} + +static boolean radeon_winsys_bo_get_handle(struct r300_winsys_bo *buffer, + unsigned stride, + struct winsys_handle *whandle) +{ + struct drm_gem_flink flink = {}; + struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer)); + whandle->stride = stride; + + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!bo->flinked) { + flink.handle = bo->handle; + + if (ioctl(bo->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { + return FALSE; + } + + bo->flinked = TRUE; + bo->flink = flink.name; + } + whandle->handle = bo->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + } + return TRUE; +} + +void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; + ws->base.buffer_set_tiling = radeon_bo_set_tiling; + ws->base.buffer_get_tiling = radeon_bo_get_tiling; + ws->base.buffer_map = radeon_bo_map; + ws->base.buffer_unmap = pb_unmap; + ws->base.buffer_wait = radeon_bo_wait; + ws->base.buffer_is_busy = radeon_bo_is_busy; + ws->base.buffer_create = radeon_winsys_bo_create; + ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; + ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h new file mode 100644 index 00000000000..092b5e693ee --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse + */ +#ifndef RADEON_DRM_BUFFER_H +#define RADEON_DRM_BUFFER_H + +#include "radeon_winsys.h" +#include "pipebuffer/pb_bufmgr.h" + +#define RADEON_PB_USAGE_CACHE (1 << 28) +#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) +#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) + +struct radeon_bomgr; + +struct radeon_bo { + struct pb_buffer base; + struct radeon_bomgr *mgr; + + void *ptr; + uint32_t size; + uint32_t handle; + uint32_t name; + + int cref; + + boolean flinked; + uint32_t flink; +}; + +struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws); +struct pb_buffer *radeon_bomgr_create_bo_from_handle(struct pb_manager *_mgr, + uint32_t handle); +boolean radeon_bomgr_get_handle(struct pb_buffer *_buf, + struct winsys_handle *whandle); +void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws); + +void radeon_bo_unref(struct radeon_bo *buf); + + +static INLINE void radeon_bo_ref(struct radeon_bo *bo) +{ + p_atomic_inc(&bo->cref); +} + +static INLINE struct pb_buffer * +pb_buffer(struct r300_winsys_bo *buffer) +{ + return (struct pb_buffer *)buffer; +} + +#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c deleted file mode 100644 index 4b0f688ce9a..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ /dev/null @@ -1,535 +0,0 @@ -#include "radeon_drm_buffer.h" -#include "radeon_drm_cs.h" - -#include "util/u_hash_table.h" -#include "util/u_memory.h" -#include "util/u_simple_list.h" -#include "pipebuffer/pb_bufmgr.h" -#include "os/os_thread.h" - -#include "state_tracker/drm_driver.h" - -#include -#include -#include - -struct radeon_drm_bufmgr; - -struct radeon_drm_buffer { - struct pb_buffer base; - struct radeon_drm_bufmgr *mgr; - - struct radeon_bo *bo; - - boolean flinked; - uint32_t flink; - - struct radeon_drm_buffer *next, *prev; -}; - -extern const struct pb_vtbl radeon_drm_buffer_vtbl; - - -static INLINE struct radeon_drm_buffer * -radeon_drm_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &radeon_drm_buffer_vtbl); - return (struct radeon_drm_buffer *)buf; -} - -struct radeon_drm_bufmgr { - /* Base class. */ - struct pb_manager base; - - /* Winsys. */ - struct radeon_drm_winsys *rws; - - /* List of mapped buffers and its mutex. */ - struct radeon_drm_buffer buffer_map_list; - pipe_mutex buffer_map_list_mutex; - - /* List of buffer handles and its mutex. */ - struct util_hash_table *buffer_handles; - pipe_mutex buffer_handles_mutex; -}; - -static INLINE struct radeon_drm_bufmgr * -radeon_drm_bufmgr(struct pb_manager *mgr) -{ - assert(mgr); - return (struct radeon_drm_bufmgr *)mgr; -} - -static void -radeon_drm_buffer_destroy(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - int name; - - if (buf->bo->ptr != NULL) { - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - /* Now test it again inside the mutex. */ - if (buf->bo->ptr != NULL) { - remove_from_list(buf); - radeon_bo_unmap(buf->bo); - buf->bo->ptr = NULL; - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - } - name = radeon_gem_name_bo(buf->bo); - if (name) { - pipe_mutex_lock(buf->mgr->buffer_handles_mutex); - util_hash_table_remove(buf->mgr->buffer_handles, - (void*)(uintptr_t)name); - pipe_mutex_unlock(buf->mgr->buffer_handles_mutex); - } - radeon_bo_unref(buf->bo); - - FREE(buf); -} - -static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) -{ - unsigned res = 0; - - if (usage & PIPE_TRANSFER_READ) - res |= PB_USAGE_CPU_READ; - - if (usage & PIPE_TRANSFER_WRITE) - res |= PB_USAGE_CPU_WRITE; - - if (usage & PIPE_TRANSFER_DONTBLOCK) - res |= PB_USAGE_DONTBLOCK; - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) - res |= PB_USAGE_UNSYNCHRONIZED; - - return res; -} - -static void * -radeon_drm_buffer_map_internal(struct pb_buffer *_buf, - unsigned flags, void *flush_ctx) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - struct radeon_drm_cs *cs = flush_ctx; - int write = 0; - - /* Note how we use radeon_bo_is_referenced_by_cs here. There are - * basically two places this map function can be called from: - * - pb_map - * - create_buffer (in the buffer reuse case) - * - * Since pb managers are per-winsys managers, not per-context managers, - * and we shouldn't reuse buffers if they are in-use in any context, - * we simply ask: is this buffer referenced by *any* CS? - * - * The problem with buffer_create is that it comes from pipe_screen, - * so we have no CS to look at, though luckily the following code - * is sufficient to tell whether the buffer is in use. */ - if (flags & PB_USAGE_DONTBLOCK) { - if (_buf->base.usage & RADEON_PB_USAGE_VERTEX) - if (radeon_bo_is_referenced_by_cs(buf->bo, NULL)) - return NULL; - } - - if (buf->bo->ptr != NULL) { - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - /* Now test ptr again inside the mutex. We might have gotten a race - * during the first test. */ - if (buf->bo->ptr != NULL) { - remove_from_list(buf); - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - return buf->bo->ptr; - } - - if (flags & PB_USAGE_DONTBLOCK) { - uint32_t domain; - if (radeon_bo_is_busy(buf->bo, &domain)) - return NULL; - } - - /* If we don't have any CS and the buffer is referenced, - * we cannot flush. */ - assert(cs || !radeon_bo_is_referenced_by_cs(buf->bo, NULL)); - - if (cs && radeon_bo_is_referenced_by_cs(buf->bo, NULL)) { - cs->flush_cs(cs->flush_data); - } - - if (flags & PB_USAGE_CPU_WRITE) { - write = 1; - } - - if (radeon_bo_map(buf->bo, write)) { - return NULL; - } - - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - remove_from_list(buf); - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - return buf->bo->ptr; -} - -static void -radeon_drm_buffer_unmap_internal(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - if (is_empty_list(buf)) { /* = is not inserted... */ - insert_at_tail(&buf->mgr->buffer_map_list, buf); - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); -} - -static void -radeon_drm_buffer_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - - -static enum pipe_error -radeon_drm_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - /* Always pinned */ - return PIPE_OK; -} - -static void -radeon_drm_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ -} - -const struct pb_vtbl radeon_drm_buffer_vtbl = { - radeon_drm_buffer_destroy, - radeon_drm_buffer_map_internal, - radeon_drm_buffer_unmap_internal, - radeon_drm_buffer_validate, - radeon_drm_buffer_fence, - radeon_drm_buffer_get_base_buffer, -}; - -static struct pb_buffer * -radeon_drm_bufmgr_create_buffer_from_handle_unsafe(struct pb_manager *_mgr, - uint32_t handle) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_winsys *rws = mgr->rws; - struct radeon_drm_buffer *buf; - struct radeon_bo *bo; - - buf = util_hash_table_get(mgr->buffer_handles, (void*)(uintptr_t)handle); - - if (buf) { - struct pb_buffer *b = NULL; - pb_reference(&b, &buf->base); - return b; - } - - bo = radeon_bo_open(rws->bom, handle, 0, - 0, 0, 0); - if (bo == NULL) - return NULL; - - buf = CALLOC_STRUCT(radeon_drm_buffer); - if (!buf) { - radeon_bo_unref(bo); - return NULL; - } - - make_empty_list(buf); - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = 0; - buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; - buf->base.base.size = bo->size; - buf->base.vtbl = &radeon_drm_buffer_vtbl; - buf->mgr = mgr; - - buf->bo = bo; - - util_hash_table_set(mgr->buffer_handles, (void*)(uintptr_t)handle, buf); - - return &buf->base; -} - -struct pb_buffer * -radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct pb_buffer *pb; - - pipe_mutex_lock(mgr->buffer_handles_mutex); - pb = radeon_drm_bufmgr_create_buffer_from_handle_unsafe(_mgr, handle); - pipe_mutex_unlock(mgr->buffer_handles_mutex); - - return pb; -} - -static struct pb_buffer * -radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_winsys *rws = mgr->rws; - struct radeon_drm_buffer *buf; - uint32_t domain; - - buf = CALLOC_STRUCT(radeon_drm_buffer); - if (!buf) - goto error1; - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = desc->alignment; - buf->base.base.usage = desc->usage; - buf->base.base.size = size; - buf->base.vtbl = &radeon_drm_buffer_vtbl; - buf->mgr = mgr; - - make_empty_list(buf); - - domain = - (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? RADEON_GEM_DOMAIN_GTT : 0) | - (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? RADEON_GEM_DOMAIN_VRAM : 0); - - buf->bo = radeon_bo_open(rws->bom, 0, size, - desc->alignment, domain, 0); - if (buf->bo == NULL) - goto error2; - - return &buf->base; - - error2: - FREE(buf); - error1: - return NULL; -} - -static void -radeon_drm_bufmgr_flush(struct pb_manager *mgr) -{ - /* NOP */ -} - -static void -radeon_drm_bufmgr_destroy(struct pb_manager *_mgr) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - util_hash_table_destroy(mgr->buffer_handles); - pipe_mutex_destroy(mgr->buffer_map_list_mutex); - pipe_mutex_destroy(mgr->buffer_handles_mutex); - FREE(mgr); -} - -static unsigned handle_hash(void *key) -{ - return (unsigned)key; -} - -static int handle_compare(void *key1, void *key2) -{ - return !((int)key1 == (int)key2); -} - -struct pb_manager * -radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws) -{ - struct radeon_drm_bufmgr *mgr; - - mgr = CALLOC_STRUCT(radeon_drm_bufmgr); - if (!mgr) - return NULL; - - mgr->base.destroy = radeon_drm_bufmgr_destroy; - mgr->base.create_buffer = radeon_drm_bufmgr_create_buffer; - mgr->base.flush = radeon_drm_bufmgr_flush; - - mgr->rws = rws; - make_empty_list(&mgr->buffer_map_list); - mgr->buffer_handles = util_hash_table_create(handle_hash, handle_compare); - pipe_mutex_init(mgr->buffer_map_list_mutex); - pipe_mutex_init(mgr->buffer_handles_mutex); - return &mgr->base; -} - -static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = NULL; - - if (_buf->vtbl == &radeon_drm_buffer_vtbl) { - buf = radeon_drm_buffer(_buf); - } else { - struct pb_buffer *base_buf; - pb_size offset; - pb_get_base_buffer(_buf, &base_buf, &offset); - - if (base_buf->vtbl == &radeon_drm_buffer_vtbl) - buf = radeon_drm_buffer(base_buf); - } - - return buf; -} - -static void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - struct r300_winsys_cs *cs, - enum pipe_transfer_usage usage) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), radeon_drm_cs(cs)); -} - -static void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - pb_unmap(_buf); -} - -boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, - struct winsys_handle *whandle) -{ - struct drm_gem_flink flink; - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { - if (!buf->flinked) { - flink.handle = buf->bo->handle; - - if (ioctl(buf->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { - return FALSE; - } - - buf->flinked = TRUE; - buf->flink = flink.name; - } - whandle->handle = buf->flink; - } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { - whandle->handle = buf->bo->handle; - } - return TRUE; -} - -static void radeon_drm_buffer_get_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled) -{ - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - uint32_t flags = 0, pitch; - - radeon_bo_get_tiling(buf->bo, &flags, &pitch); - - *microtiled = R300_BUFFER_LINEAR; - *macrotiled = R300_BUFFER_LINEAR; - if (flags & RADEON_BO_FLAGS_MICRO_TILE) - *microtiled = R300_BUFFER_TILED; - - if (flags & RADEON_BO_FLAGS_MACRO_TILE) - *macrotiled = R300_BUFFER_TILED; -} - -static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled, - uint32_t pitch) -{ -#ifndef RADEON_BO_FLAGS_MICRO_TILE_SQUARE -#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 -#endif - - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - uint32_t flags = 0; - - if (microtiled == R300_BUFFER_TILED) - flags |= RADEON_BO_FLAGS_MICRO_TILE; - else if (microtiled == R300_BUFFER_SQUARETILED) - flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; - - if (macrotiled == R300_BUFFER_TILED) - flags |= RADEON_BO_FLAGS_MACRO_TILE; - - radeon_bo_set_tiling(buf->bo, flags, pitch); -} - -static struct r300_winsys_cs_buffer *radeon_drm_get_cs_handle( - struct r300_winsys_screen *rws, - struct r300_winsys_buffer *_buf) -{ - /* return pure radeon_bo. */ - return (struct r300_winsys_cs_buffer*) - get_drm_buffer(radeon_pb_buffer(_buf))->bo; -} - -static boolean radeon_drm_is_buffer_referenced(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *_buf, - enum r300_reference_domain domain) -{ - struct radeon_bo *bo = (struct radeon_bo*)_buf; - uint32_t tmp; - - if (domain & R300_REF_CS) { - if (radeon_bo_is_referenced_by_cs(bo, NULL)) { - return TRUE; - } - } - - if (domain & R300_REF_HW) { - if (radeon_bo_is_busy(bo, &tmp)) { - return TRUE; - } - } - - return FALSE; -} - -void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_buffer *rpb, *t_rpb; - - pipe_mutex_lock(mgr->buffer_map_list_mutex); - - foreach_s(rpb, t_rpb, &mgr->buffer_map_list) { - radeon_bo_unmap(rpb->bo); - rpb->bo->ptr = NULL; - remove_from_list(rpb); - } - - make_empty_list(&mgr->buffer_map_list); - - pipe_mutex_unlock(mgr->buffer_map_list_mutex); -} - -static void radeon_drm_buffer_wait(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf) -{ - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - - radeon_bo_wait(buf->bo); -} - -void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws) -{ - ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; - ws->base.buffer_set_tiling = radeon_drm_buffer_set_tiling; - ws->base.buffer_get_tiling = radeon_drm_buffer_get_tiling; - ws->base.buffer_map = radeon_drm_buffer_map; - ws->base.buffer_unmap = radeon_drm_buffer_unmap; - ws->base.buffer_wait = radeon_drm_buffer_wait; - ws->base.cs_is_buffer_referenced = radeon_drm_is_buffer_referenced; -} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h deleted file mode 100644 index 494abdc0b48..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright © 2008 Jérôme Glisse - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -/* - * Authors: - * Jérôme Glisse - */ -#ifndef RADEON_DRM_BUFFER_H -#define RADEON_DRM_BUFFER_H - -#include "radeon_winsys.h" - -#define RADEON_PB_USAGE_VERTEX (1 << 28) -#define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) -#define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) - -static INLINE struct pb_buffer * -radeon_pb_buffer(struct r300_winsys_buffer *buffer) -{ - return (struct pb_buffer *)buffer; -} - -struct pb_manager *radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws); -struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle); -void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); -boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, - struct winsys_handle *whandle); -void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws); - -#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index 2ecf1bb014c..f9e595017d1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -30,16 +30,13 @@ */ #include "radeon_winsys.h" -#include "radeon_drm_buffer.h" +#include "radeon_drm_bo.h" #include "radeon_drm_cs.h" #include "radeon_drm_public.h" #include "pipebuffer/pb_bufmgr.h" #include "util/u_memory.h" -#include -#include -#include #include #include @@ -166,11 +163,47 @@ static void radeon_winsys_destroy(struct r300_winsys_screen *rws) ws->cman->destroy(ws->cman); ws->kman->destroy(ws->kman); - - radeon_bo_manager_gem_dtor(ws->bom); FREE(rws); } +static uint32_t radeon_get_value(struct r300_winsys_screen *rws, + enum r300_value_id id) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; + + switch(id) { + case R300_VID_PCI_ID: + return ws->pci_id; + case R300_VID_GB_PIPES: + return ws->gb_pipes; + case R300_VID_Z_PIPES: + return ws->z_pipes; + case R300_VID_GART_SIZE: + return ws->gart_size; + case R300_VID_VRAM_SIZE: + return ws->vram_size; + case R300_VID_DRM_MAJOR: + return ws->drm_major; + case R300_VID_DRM_MINOR: + return ws->drm_minor; + case R300_VID_DRM_PATCHLEVEL: + return ws->drm_patchlevel; + case R300_VID_DRM_2_1_0: + return ws->drm_major*100 + ws->drm_minor >= 201; + case R300_VID_DRM_2_3_0: + return ws->drm_major*100 + ws->drm_minor >= 203; + case R300_VID_DRM_2_6_0: + return ws->drm_major*100 + ws->drm_minor >= 206; + case R300_VID_DRM_2_8_0: + return ws->drm_major*100 + ws->drm_minor >= 208; + case R300_CAN_HYPERZ: + return ws->hyperz; + case R300_CAN_AACOMPRESS: + return ws->aacompress; + } + return 0; +} + struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) { struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys); @@ -186,10 +219,7 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) } /* Create managers. */ - ws->bom = radeon_bo_manager_gem_ctor(fd); - if (!ws->bom) - goto fail; - ws->kman = radeon_drm_bufmgr_create(ws); + ws->kman = radeon_bomgr_create(ws); if (!ws->kman) goto fail; ws->cman = pb_cache_manager_create(ws->kman, 1000000); @@ -198,22 +228,18 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; + ws->base.get_value = radeon_get_value; - radeon_drm_bufmgr_init_functions(ws); + radeon_bomgr_init_functions(ws); radeon_drm_cs_init_functions(ws); - radeon_winsys_init_functions(ws); return &ws->base; fail: - if (ws->bom) - radeon_bo_manager_gem_dtor(ws->bom); - if (ws->cman) ws->cman->destroy(ws->cman); if (ws->kman) ws->kman->destroy(ws->kman); - FREE(ws); return NULL; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 60bc36b0929..6aa3f2ecce1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -63,13 +63,12 @@ */ #include "radeon_drm_cs.h" -#include "radeon_drm_buffer.h" #include "util/u_memory.h" +#include #include #include -#include #include #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) @@ -131,8 +130,7 @@ static inline void update_domains(struct drm_radeon_cs_reloc *reloc, } } -static int radeon_get_reloc(struct radeon_drm_cs *cs, - struct radeon_bo *bo) +int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo) { struct drm_radeon_cs_reloc *reloc; unsigned i; @@ -235,7 +233,7 @@ static void radeon_add_reloc(struct radeon_drm_cs *cs, } static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *buf, + struct r300_winsys_cs_handle *buf, enum r300_buffer_domain rd, enum r300_buffer_domain wd) { @@ -263,7 +261,7 @@ static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs) } static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *buf) + struct r300_winsys_cs_handle *buf) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; @@ -287,9 +285,6 @@ static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) int r; if (cs->base.cdw) { - /* Unmap buffers. */ - radeon_drm_bufmgr_flush_maps(cs->ws->kman); - /* Prepare the arguments. */ cs->chunks[0].length_dw = cs->base.cdw; @@ -319,7 +314,7 @@ static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) /* Unreference buffers, cleanup. */ for (i = 0; i < cs->crelocs; i++) { - radeon_bo_unref((struct radeon_bo*)cs->relocs_bo[i]); + radeon_bo_unref(cs->relocs_bo[i]); cs->relocs_bo[i] = NULL; } @@ -341,13 +336,22 @@ static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs) } static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs, - void (*flush)(void *), void *user) + void (*flush)(void *), void *user) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); cs->flush_cs = flush; cs->flush_data = user; } +static boolean radeon_bo_is_referenced(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_handle *_buf) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)_buf; + + return radeon_bo_is_referenced_by_cs(cs, bo); +} + void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { ws->base.cs_create = radeon_drm_cs_create; @@ -357,4 +361,5 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; ws->base.cs_flush = radeon_drm_cs_emit; ws->base.cs_set_flush = radeon_drm_cs_set_flush; + ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 76046534b65..3913c4e79a3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -1,7 +1,7 @@ #ifndef RADEON_DRM_CS_H #define RADEON_DRM_CS_H -#include "radeon_winsys.h" +#include "radeon_drm_bo.h" #include struct radeon_drm_cs { @@ -31,12 +31,25 @@ struct radeon_drm_cs { unsigned reloc_indices_hashlist[256]; }; +int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo); + static INLINE struct radeon_drm_cs * radeon_drm_cs(struct r300_winsys_cs *base) { return (struct radeon_drm_cs*)base; } +static INLINE int radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + return radeon_get_reloc(cs, bo) != -1; +} + +static INLINE int radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) +{ + return bo->cref > 1; +} + void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c deleted file mode 100644 index aebe6a60791..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_drm_buffer.h" - -#include "util/u_memory.h" -#include "pipebuffer/pb_bufmgr.h" - -#include "state_tracker/drm_driver.h" - -static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, - enum r300_buffer_domain domain) -{ - unsigned res = 0; - - if (bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT)) - res |= PB_USAGE_GPU_WRITE; - - if (bind & PIPE_BIND_SAMPLER_VIEW) - res |= PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE; - - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - res |= PB_USAGE_GPU_READ; - - if (bind & PIPE_BIND_TRANSFER_WRITE) - res |= PB_USAGE_CPU_WRITE; - - if (bind & PIPE_BIND_TRANSFER_READ) - res |= PB_USAGE_CPU_READ; - - /* Is usage of any use for us? Probably not. */ - - /* Now add driver-specific usage flags. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - res |= RADEON_PB_USAGE_VERTEX; - - if (domain & R300_DOMAIN_GTT) - res |= RADEON_PB_USAGE_DOMAIN_GTT; - - if (domain & R300_DOMAIN_VRAM) - res |= RADEON_PB_USAGE_DOMAIN_VRAM; - - return res; -} - -static struct r300_winsys_buffer * -radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, - unsigned size, - unsigned alignment, - unsigned bind, - unsigned usage, - enum r300_buffer_domain domain) -{ - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct pb_desc desc; - struct pb_manager *provider; - struct pb_buffer *buffer; - - memset(&desc, 0, sizeof(desc)); - desc.alignment = alignment; - desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); - - /* Assign a buffer manager. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - provider = ws->cman; - else - provider = ws->kman; - - buffer = provider->create_buffer(provider, size, &desc); - if (!buffer) - return NULL; - - return (struct r300_winsys_buffer*)buffer; -} - -static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src) -{ - struct pb_buffer *_src = radeon_pb_buffer(src); - struct pb_buffer *_dst = radeon_pb_buffer(*pdst); - - pb_reference(&_dst, _src); - - *pdst = (struct r300_winsys_buffer*)_dst; -} - -static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, - struct winsys_handle *whandle, - unsigned *stride, - unsigned *size) -{ - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct pb_buffer *_buf; - - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); - - if (stride) - *stride = whandle->stride; - if (size) - *size = _buf->base.size; - - return (struct r300_winsys_buffer*)_buf; -} - -static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buffer, - unsigned stride, - struct winsys_handle *whandle) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buffer); - whandle->stride = stride; - return radeon_drm_bufmgr_get_handle(_buf, whandle); -} - -static uint32_t radeon_get_value(struct r300_winsys_screen *rws, - enum r300_value_id id) -{ - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; - - switch(id) { - case R300_VID_PCI_ID: - return ws->pci_id; - case R300_VID_GB_PIPES: - return ws->gb_pipes; - case R300_VID_Z_PIPES: - return ws->z_pipes; - case R300_VID_GART_SIZE: - return ws->gart_size; - case R300_VID_VRAM_SIZE: - return ws->vram_size; - case R300_VID_DRM_MAJOR: - return ws->drm_major; - case R300_VID_DRM_MINOR: - return ws->drm_minor; - case R300_VID_DRM_PATCHLEVEL: - return ws->drm_patchlevel; - case R300_VID_DRM_2_1_0: - return ws->drm_major*100 + ws->drm_minor >= 201; - case R300_VID_DRM_2_3_0: - return ws->drm_major*100 + ws->drm_minor >= 203; - case R300_VID_DRM_2_6_0: - return ws->drm_major*100 + ws->drm_minor >= 206; - case R300_VID_DRM_2_8_0: - return ws->drm_major*100 + ws->drm_minor >= 208; - case R300_CAN_HYPERZ: - return ws->hyperz; - case R300_CAN_AACOMPRESS: - return ws->aacompress; - } - return 0; -} - -void radeon_winsys_init_functions(struct radeon_drm_winsys *ws) -{ - ws->base.get_value = radeon_get_value; - ws->base.buffer_create = radeon_r300_winsys_buffer_create; - ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; - ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; - ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; -} diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 743c4fbc4a9..be5614ce3ec 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -37,7 +37,6 @@ struct radeon_drm_winsys { int fd; /* DRM file descriptor */ - struct radeon_bo_manager *bom; /* Radeon BO manager. */ struct pb_manager *kman; struct pb_manager *cman; @@ -63,6 +62,4 @@ radeon_drm_winsys(struct r300_winsys_screen *base) return (struct radeon_drm_winsys*)base; } -void radeon_winsys_init_functions(struct radeon_drm_winsys *ws); - #endif -- cgit v1.2.3 From 98f344c5043c570ac1ae395ba0a75f3e3ad06dc8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 11 Feb 2011 01:18:53 +0100 Subject: r300g: fix warning --- src/gallium/drivers/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ca55984ad9e..6767a5521b2 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -944,7 +944,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 6, 0, 0, FALSE)) + NULL, dwords, 0, 0, FALSE)) return; } else { if (!r300_emit_states(r300, -- cgit v1.2.3 From 9a1fe76a20c6eca67a8b933aa9e84f7ef0ad9ca2 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 11 Feb 2011 03:17:04 +0100 Subject: r300g: prevent NULL pointer dereference in r300_buffer_create Should fix: https://bugs.freedesktop.org/show_bug.cgi?id=33185 --- src/gallium/drivers/r300/r300_screen_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 2e85e2d6ffb..04afae609f9 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -226,14 +226,14 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.b.b.width0, alignment, rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(rbuf->buf); - if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; } + rbuf->cs_buf = + r300screen->rws->buffer_get_cs_handle(rbuf->buf); + return &rbuf->b.b.b; } -- cgit v1.2.3 From 0d851f6e9c6046052ddce3860e625537832530a0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 10 Feb 2011 14:07:06 +1000 Subject: r600g: handle 16/32 u/s norm formats properly add support for the 32-bit types, also fixup the export setting to handle types with channels > 11 bits properly Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_state_inlines.h | 10 ++++++---- src/gallium/drivers/r600/evergreen_state.c | 16 ++++++++++++++-- src/gallium/drivers/r600/evergreend.h | 3 +++ src/gallium/drivers/r600/r600_state.c | 15 +++++++++++++-- src/gallium/drivers/r600/r600_state_inlines.h | 10 ++++++---- src/gallium/drivers/r600/r600_texture.c | 13 +++++++++++++ src/gallium/drivers/r600/r600d.h | 2 ++ 7 files changed, 57 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index ca00e61f52b..46369cc1e26 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -362,14 +362,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_028C70_COLOR_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_028C70_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_028C70_COLOR_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -471,6 +470,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_32_32; /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 83ab0df9c16..4821259e4e5 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -648,6 +648,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned tile_type; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; @@ -679,8 +680,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_028C70_SOURCE_FORMAT(1); + + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, + if we aren't a float, sint or uint */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && + ntype != 4 && ntype != 5) + color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); if (rtex->tiled) { tile_type = rtex->tile_type; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index dec32b504ee..f0a1ee0cd02 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -327,6 +327,9 @@ #define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24) #define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3) #define C_028C70_SOURCE_FORMAT 0xFCFFFFFF +#define V_028C70_EXPORT_4C_32BPC 0x0 +#define V_028C70_EXPORT_4C_16BPC 0x1 +#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */ #define S_028C70_RAT(x) (((x) & 0x1) << 26) #define G_028C70_RAT(x) (((x) >> 26) & 0x1) #define C_028C70_RAT 0xFBFFFFFF diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 74dad450729..0834bf404a0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -692,6 +692,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; @@ -716,6 +717,12 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_0280A0_NUMBER_SRGB; + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); color_info = S_0280A0_FORMAT(format) | @@ -723,8 +730,12 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); + + /* on R600 this can't be set if BLEND_CLAMP isn't set, + if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12) + color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index f68bc849e2e..81805158966 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -355,14 +355,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return FMT_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -469,6 +468,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; /* YUV buffers. */ case PIPE_FORMAT_UYVY: diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index b7bfdd8c166..df8072fc13c 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -990,6 +990,19 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = FMT_16_16_16_16; goto out_word4; } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + goto out_word4; + case 2: + result = FMT_32_32; + goto out_word4; + case 4: + result = FMT_32_32_32_32; + goto out_word4; + } } goto out_unknown; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 1814f504ed9..e8558c49a7c 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -248,6 +248,8 @@ #define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) #define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) #define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF +#define V_0280A0_EXPORT_FULL 0 +#define V_0280A0_EXPORT_NORM 1 #define R_028060_CB_COLOR0_SIZE 0x028060 #define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) -- cgit v1.2.3 From 9d85aba0e392250ecea0377f9aa9691bd02622e0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Feb 2011 10:45:59 +1000 Subject: r600g: drop two unused | 0 that are actually in word4 anyways. these were NOPs anyways. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4821259e4e5..88dcc9ba544 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -411,7 +411,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | + word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0834bf404a0..01c59072a26 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -458,7 +458,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); -- cgit v1.2.3 From 596684eb93067f5281ce7c32123a7f009c8a98c6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Feb 2011 11:47:43 +1000 Subject: r600g: get correct height alignment useful for s3tc --- src/gallium/drivers/r600/r600_texture.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index df8072fc13c..b8bc365bcbd 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -430,7 +430,6 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned tile_height; unsigned level = surf_tmpl->u.tex.level; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -450,8 +449,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); - surface->aligned_height = align(surface->base.height, tile_height); + surface->aligned_height = r600_texture_get_nblocksy(pipe->screen, + rtex, level); return &surface->base; } -- cgit v1.2.3 From f0ca9f71341f494be961aacc08bddd9cea28fca2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Feb 2011 07:47:34 +0000 Subject: svga: Stippled lines can also be drawn with triangles. --- src/gallium/drivers/svga/svga_pipe_rasterizer.c | 3 ++- src/gallium/drivers/svga/svga_swtnl_state.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index c6657e79ef3..d391fde7b2f 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -108,7 +108,8 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } if (templ->line_stipple_enable) { - /* LinePattern not implemented on all backends. + /* XXX: LinePattern not implemented on all backends, and there is no + * mechanism to query it. */ if (!svga->debug.force_hw_line_stipple) { SVGA3dLinePattern lp; diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index a7592382936..efda2f605b9 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -61,7 +61,7 @@ static void set_draw_viewport( struct svga_context *svga ) * going to be drawn with triangles, but we're not catching all * cases where that will happen. */ - if (svga->curr.rast->templ.line_width > 1.0) + if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES) { adjx = SVGA_LINE_ADJ_X + 0.175; adjy = SVGA_LINE_ADJ_Y - 0.175; -- cgit v1.2.3 From 7ac2db893a87684641f298b40e25a3b2d9991b53 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Feb 2011 11:14:44 +0000 Subject: llvmpipe: Use u_math's round. --- src/gallium/drivers/llvmpipe/lp_test_main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 149ee6f1256..d229c620310 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -35,24 +35,13 @@ #include "util/u_cpu_detect.h" +#include "util/u_math.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_init.h" #include "lp_test.h" -#ifdef PIPE_CC_MSVC -static INLINE double -round(double x) -{ - if (x >= 0.0) - return floor(x + 0.5); - else - return ceil(x - 0.5); -} -#endif - - void dump_type(FILE *fp, struct lp_type type) -- cgit v1.2.3 From 57a3d36a685b4d6e39988d6c10a3201be789befc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Feb 2011 11:20:54 +0000 Subject: svga: Don't use the draw pipeline for non-AA lines with a fractional width. Spotted by Jakob Bornecrantz. --- src/gallium/drivers/svga/svga_pipe_rasterizer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index d391fde7b2f..2c4292e1f38 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -100,8 +100,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->need_pipeline_tris_str = "poly stipple"; } - if (templ->line_width != 1.0 && - templ->line_width != 0.0 && + if (templ->line_width >= 1.5f && !svga->debug.no_line_width) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; rast->need_pipeline_lines_str = "line width"; -- cgit v1.2.3 From 6ed0f2ac112d22278cf051c2cee9c2199a9025ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Feb 2011 11:22:02 +0000 Subject: svga: Enable the draw pipeline for smooth lines. Spotted by Brian Paul. --- src/gallium/drivers/svga/svga_pipe_rasterizer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 2c4292e1f38..f2c1a3c578d 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -127,6 +127,11 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->need_pipeline_points_str = "smooth points"; } + if (templ->line_smooth) { + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "smooth lines"; + } + { int fill_front = templ->fill_front; int fill_back = templ->fill_back; -- cgit v1.2.3 From 8c617990519a113886fe743b5932525b0cb37544 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Feb 2011 09:03:03 -0700 Subject: svga: remove old comment, remove extra whitespace --- src/gallium/drivers/svga/svga_pipe_rasterizer.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index f2c1a3c578d..4a1a37f1765 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -77,7 +77,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* point_quad_rasterization - ? */ /* point_size_per_vertex - ? */ /* sprite_coord_mode - ??? */ - /* bypass_vs_viewport_and_clip - handled by viewport setup */ /* flatshade_first - handled by index translation */ /* gl_rasterization_rules - XXX - viewport code */ /* line_width - draw module */ @@ -220,9 +219,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->hw_unfilled = fill; } - - - if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { /* Turn off stuff which will get done in the draw module: */ -- cgit v1.2.3 From da2e541218d65c8931f0061fc4badd8fdedfcb83 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Feb 2011 11:56:44 -0700 Subject: svga: add max DMA size check in svga_winsys_buffer_create() This fixes a problem when trying to use large (2K x 2K) texture images. We'll DMA the image in chunks. Patch written by Jose. --- src/gallium/drivers/svga/svga_resource_buffer_upload.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 765d2f34082..fdc0329f6c9 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,6 +40,9 @@ #include "svga_debug.h" +#define MAX_DMA_SIZE (8 * 1024 * 1024) + + /** * Allocate a winsys_buffer (ie. DMA, aka GMR memory). * @@ -57,6 +60,13 @@ svga_winsys_buffer_create( struct svga_context *svga, struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_buffer *buf; + /* XXX this shouldn't be a hard-coded number; it should be queried + * somehow. + */ + if (size > MAX_DMA_SIZE) { + return NULL; + } + /* Just try */ buf = sws->buffer_create(sws, alignment, usage, size); if(!buf) { -- cgit v1.2.3 From f7d84c177fe58e767a58748b33794ecea507ac8c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Feb 2011 13:54:15 -0700 Subject: svga: more comments for need_pipeline field --- src/gallium/drivers/svga/svga_context.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0550ddd79b9..37ca417d2ff 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -147,8 +147,11 @@ struct svga_rasterizer_state { float pointsize; unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ - unsigned need_pipeline:16; /* which prims do we need help for? */ + /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */ + unsigned need_pipeline:16; + + /** For debugging: */ const char* need_pipeline_tris_str; const char* need_pipeline_lines_str; const char* need_pipeline_points_str; -- cgit v1.2.3 From 396da5df0e50c90a74bbf57c980e484cf1a527b7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Feb 2011 13:56:06 -0700 Subject: svga: comments and debug code --- src/gallium/drivers/svga/svga_state_need_swtnl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index e06e1f8e5f9..bfd77f6b860 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -35,6 +35,11 @@ /*********************************************************************** */ + +/** + * Given a gallium vertex element format, return the corresponding SVGA3D + * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats. + */ static INLINE SVGA3dDeclType svga_translate_vertex_format(enum pipe_format format) { @@ -80,6 +85,7 @@ static int update_need_swvfetch( struct svga_context *svga, for (i = 0; i < svga->curr.velems->count; i++) { svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format); if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { + /* Unsupported format - use software fetch */ need_swvfetch = TRUE; break; } @@ -145,6 +151,10 @@ static int update_need_pipeline( struct svga_context *svga, svga->dirty |= SVGA_NEW_NEED_PIPELINE; } + /* DEBUG */ + if (0 && svga->state.sw.need_pipeline) + debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline); + return 0; } -- cgit v1.2.3 From 6c3a82a1a3244880cfe9b72dcfb29d29d0baafc3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Feb 2011 14:07:43 -0700 Subject: svga: disable a debug_printf() call --- src/gallium/drivers/svga/svga_resource_texture.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 213547115a4..b63c84eee71 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -390,11 +390,15 @@ svga_texture_get_transfer(struct pipe_context *pipe, if(st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", - __FUNCTION__, - (nblocksy*st->base.stride + 1023)/1024, - (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, - (st->hw_nblocksy*st->base.stride + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting into %u x %u KB DMA transfers\n", + __FUNCTION__, + (nblocksy*st->base.stride + 1023)/1024, + (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (st->hw_nblocksy*st->base.stride + 1023)/1024); + } + st->swbuf = MALLOC(nblocksy*st->base.stride); if(!st->swbuf) goto no_swbuf; -- cgit v1.2.3 From 9ad9a6861a01997fa88fe35089d63348f7b076af Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 12 Feb 2011 23:37:14 +0100 Subject: r300g: add debug options nozmask and nohiz which disable some hyper-z features --- src/gallium/drivers/r300/r300_debug.c | 2 ++ src/gallium/drivers/r300/r300_screen.c | 5 +++++ src/gallium/drivers/r300/r300_screen.h | 2 ++ 3 files changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index c6b4804cd8d..b60cfd1f248 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -50,6 +50,8 @@ static const struct debug_named_value debug_options[] = { { "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, { "noopt", DBG_NO_OPT, "Disable shader optimizations" }, { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, + { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, + { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index f54ba4286d9..0b0220654a7 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -454,6 +454,11 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); + if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) + r300screen->caps.zmask_ram = 0; + if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) + r300screen->caps.hiz_ram = 0; + r300screen->caps.index_bias_supported = r300screen->caps.is_r500 && rws->get_value(rws, R300_VID_DRM_2_3_0); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 973b7926fd1..c935f55ccbf 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -102,6 +102,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FAKE_OCC (1 << 19) #define DBG_NO_OPT (1 << 20) #define DBG_NO_CBZB (1 << 21) +#define DBG_NO_ZMASK (1 << 22) +#define DBG_NO_HIZ (1 << 23) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ -- cgit v1.2.3 From 1fd6bbc88103cd49623f8235bb20f5f04dfd78c6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 12 Feb 2011 23:38:00 +0100 Subject: r300g: when printing shader linker errors to stderr, report it's not a bug --- src/gallium/drivers/r300/r300_state_derived.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 41b57b502fc..d715dd82f86 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -489,7 +489,8 @@ static void r300_update_rs_block(struct r300_context *r300) for (; i < ATTR_GENERIC_COUNT; i++) { if (fs_inputs->generic[i] != ATTR_UNUSED) { fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " - "not enough hardware slots.\n", i); + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); } } @@ -524,7 +525,8 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } else { fprintf(stderr, "r300: ERROR: FS input fog unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, " + "do not report it)\n"); } } } @@ -551,7 +553,8 @@ static void r300_update_rs_block(struct r300_context *r300) } else { if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) { fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, do not " + "report it)\n"); } } -- cgit v1.2.3 From e6e4860555068af8aba79d620acafc5fd5214d1f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 13 Feb 2011 07:06:22 +0100 Subject: r300g: correctly determine if a texture is blittable in texture_get_transfer --- src/gallium/drivers/r300/r300_transfer.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index b5572128874..30de9ec1e32 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -87,6 +87,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; + const struct util_format_description *desc = + util_format_description(texture->format); referenced_cs = r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf); @@ -97,9 +99,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, r300->rws->buffer_is_busy(tex->buf); } - blittable = ctx->screen->is_format_supported( - ctx->screen, texture->format, texture->target, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0); + blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || + desc->layout == UTIL_FORMAT_LAYOUT_S3TC; trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -113,7 +114,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * for this transfer. * Also make write transfers pipelined. */ if (tex->tex.microtile || tex->tex.macrotile[level] || - ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { + (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) { if (r300->blitter->running) { fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); os_break(); -- cgit v1.2.3 From 8fe5da89e33a2408c21dd536d0b2e2178aeaef1e Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 13 Feb 2011 22:05:28 +0100 Subject: nv50: fix query assertion --- src/gallium/drivers/nv50/nv50_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 53f94820ce0..f3418df8381 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -45,7 +45,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type) struct nv50_query *q = CALLOC_STRUCT(nv50_query); int ret; - assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); + assert (type == PIPE_QUERY_OCCLUSION_COUNTER); q->type = type; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256, -- cgit v1.2.3 From 5197b09beeb729637b915bc7b5d599227387d81e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 00:55:18 +0100 Subject: r300g: remove the relocation after AARESOLVE_PITCH --- src/gallium/drivers/r300/r300_emit.c | 2 -- src/gallium/drivers/r300/r300_render.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 13c1f2d8424..be5768a3e5d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -354,9 +354,7 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) if (aa->dest) { OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); OUT_CS_RELOC(aa->dest); - OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); - OUT_CS_RELOC(aa->dest); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6767a5521b2..0d50de5e7f9 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1215,7 +1215,7 @@ static void r300_resource_resolve(struct pipe_context* pipe, aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; - r300->aa_state.size = 12; + r300->aa_state.size = 10; r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ -- cgit v1.2.3 From 004dd015839dfb77b9d66fb2df6514feefb87d9e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 06:26:56 +0100 Subject: r300g: fix reference counting when translating indices --- src/gallium/drivers/r300/r300_render_translate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index dbc64ebf827..777857514fc 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -44,6 +44,7 @@ void r300_translate_index_buffer(struct r300_context *r300, &r300->context, *index_buffer, index_offset, *start, count, ptr); + *index_buffer = NULL; pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; *start = out_offset / 2; @@ -59,6 +60,7 @@ void r300_translate_index_buffer(struct r300_context *r300, index_offset, *start, count, ptr); + *index_buffer = NULL; pipe_resource_reference(index_buffer, out_buffer); *start = out_offset / 2; r300->validate_buffers = TRUE; @@ -74,6 +76,7 @@ void r300_translate_index_buffer(struct r300_context *r300, index_offset, *start, count, ptr); + *index_buffer = NULL; pipe_resource_reference(index_buffer, out_buffer); *start = out_offset / 4; r300->validate_buffers = TRUE; -- cgit v1.2.3 From 5a6ba08c21f24b14458a2084a170ddfbe8f5d793 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 06:45:55 +0100 Subject: r300g: emit 3D_LOAD_VBPNTR only when necessary I thought I couldn't skip emitting this packet in some cases. Well it looks like I can. --- src/gallium/drivers/r300/r300_context.h | 5 +- src/gallium/drivers/r300/r300_emit.c | 81 ++++++++------------------------- src/gallium/drivers/r300/r300_flush.c | 1 + src/gallium/drivers/r300/r300_render.c | 10 +++- 4 files changed, 33 insertions(+), 64 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 9335c680bf6..6e940b46fa4 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -589,9 +589,10 @@ struct r300_context { /* const tracking for VS */ int vs_const_base; - /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ - uint32_t vertex_arrays_cb[(16 * 3 + 1) / 2]; + /* Vertex array state info */ boolean vertex_arrays_dirty; + boolean vertex_arrays_indexed; + int vertex_arrays_offset; /* Whether any buffer (FB, textures, VBOs) has been set, but buffers * haven't been validated yet. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index be5768a3e5d..027cd5b7ea0 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -805,40 +805,6 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size) -{ - struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vbuf_mgr->vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - unsigned *hw_format_size = r300->velems->format_size; - unsigned size1, size2, vertex_array_count = r300->velems->count; - int i; - CB_LOCALS; - - BEGIN_CB(r300->vertex_arrays_cb, packet_size); - for (i = 0; i < vertex_array_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); - } - - if (vertex_array_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; - - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - } - END_CB; - - r300->vertex_arrays_dirty = FALSE; -} - void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) { struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; @@ -854,35 +820,28 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - if (!offset) { - if (r300->vertex_arrays_dirty) { - r300_update_vertex_arrays_cb(r300, packet_size); - } - OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size); - } else { - struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->format_size; - unsigned size1, size2; - - for (i = 0; i < vertex_array_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; - - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); - } + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2; - if (vertex_array_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - } + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + } + + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } for (i = 0; i < vertex_array_count; i++) { diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 986ea5ff35a..1e80f802f56 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -59,6 +59,7 @@ static void r300_flush(struct pipe_context* pipe, r300_mark_atom_dirty(r300, atom); } } + r300->vertex_arrays_dirty = TRUE; /* Unmark HWTCL state for SWTCL. */ if (!r300->screen->caps.has_tcl) { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 0d50de5e7f9..abe7b506d78 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -264,9 +264,17 @@ static boolean r300_emit_states(struct r300_context *r300, r500_emit_index_bias(r300, 0); } - if (emit_vertex_arrays) + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset)) { r300_emit_vertex_arrays(r300, buffer_offset, indexed); + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + } + if (emit_vertex_arrays_swtcl) r300_emit_vertex_arrays_swtcl(r300, indexed); } -- cgit v1.2.3 From e9d993e9b9d7ea2b4a9be7caa4e5a73fe5126cbe Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 06:59:00 +0100 Subject: r600g: do not destroy the original index buffer when translating indices Because we only translate a subrange of the buffer. --- src/gallium/drivers/r600/r600_state_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index bcaf2b9e45e..72707fbd8b8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -449,14 +449,14 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.ctx = ctx; if (info->indexed && rctx->index_buffer.buffer) { draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, + r600_translate_index_buffer(rctx, &draw.index_buffer, &rctx->index_buffer.index_size, &draw.info.start, info->count); draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); draw.index_buffer_offset = draw.info.start * draw.index_size; draw.info.start = 0; -- cgit v1.2.3 From 3b01b52bd78e3d2fc857feacebd815a5fae00c94 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 14 Feb 2011 00:07:07 -0800 Subject: r300g: Move declaration before code. Fixes SCons build. --- src/gallium/drivers/r300/r300_emit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 027cd5b7ea0..9c3a5e882f2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -814,15 +814,16 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde int i; unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2; CS_LOCALS(r300); BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->format_size; - unsigned size1, size2; + hw_format_size = r300->velems->format_size; for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; -- cgit v1.2.3 From 3d5ac32f3bf95ceb9f3f03d6dedea5445ed35b18 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 09:52:15 +0100 Subject: r300g: consolidate emission of common draw regs --- src/gallium/drivers/r300/r300_render.c | 44 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index abe7b506d78..fddabee7b24 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -127,6 +127,20 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) END_CS; } +static void r300_emit_draw_init(struct r300_context *r300, unsigned mode, + unsigned min_index, unsigned max_index) +{ + CS_LOCALS(r300); + + BEGIN_CS(5); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(max_index); + OUT_CS(min_index); + END_CS; +} + /* This function splits the index bias value into two parts: * - buffer_offset: the value that can be safely added to buffer offsets * in r300_emit_vertex_arrays (it must yield a positive offset when added to @@ -366,7 +380,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ - unsigned dwords = 9 + count * vertex_size; + unsigned dwords = 4 + count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; @@ -406,13 +420,10 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, mapelem[i] = map[vbi] + (velem->src_offset / 4); } + r300_emit_draw_init(r300, mode, 0, count-1); + BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); @@ -449,15 +460,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300, return; } - BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); + r300_emit_draw_init(r300, mode, 0, count-1); + + BEGIN_CS(2 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -479,22 +487,16 @@ static void r300_emit_draw_elements(struct r300_context *r300, boolean alt_num_verts = count > 65535; CS_LOCALS(r300); - if (count >= (1 << 24)) { + if (count >= (1 << 24) || maxIndex >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); + "refusing to render (maxIndex: %i).\n", count, maxIndex); return; } DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); - BEGIN_CS(5); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(maxIndex); - OUT_CS(minIndex); - END_CS; + r300_emit_draw_init(r300, mode, minIndex, maxIndex); /* If start is odd, render the first triangle with indices embedded * in the command stream. This will increase start by 3 and make it -- cgit v1.2.3 From d173f1ba8a3d77e46f73ecb00378ea9598fe2c68 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 10:05:52 +0100 Subject: r300g: fix fallback for misaligned ushort indices with num vertices >= 65535 --- src/gallium/drivers/r300/r300_render.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index fddabee7b24..051f434d428 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -615,7 +615,11 @@ static void r300_draw_range_elements(struct pipe_context* pipe, minIndex, maxIndex, mode, start, count, indices3); } else { do { - short_count = MIN2(count, 65534); + if (indexSize == 2 && (start & 1)) + short_count = MIN2(count, 65535); + else + short_count = MIN2(count, 65534); + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, short_count, indices3); -- cgit v1.2.3 From 476cec37d615df7c7329ef74d4a7ea7200b2d8fb Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 10:08:59 +0100 Subject: r300g: do not create a user buffer struct for misaligned ushort indices fallback --- src/gallium/drivers/r300/r300_render.c | 15 ++++++--------- src/gallium/drivers/r300/r300_screen_buffer.c | 3 +-- src/gallium/drivers/r300/r300_screen_buffer.h | 2 +- 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 051f434d428..37b9934a619 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -578,7 +578,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (indexSize == 2 && (start & 1) && !r300_resource(indexBuffer)->b.user_ptr) { struct pipe_transfer *transfer; - struct pipe_resource *userbuf; uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer, PIPE_TRANSFER_READ | @@ -590,18 +589,16 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } else { /* Copy the mapped index buffer directly to the upload buffer. * The start index will be aligned simply from the fact that - * every sub-buffer in u_upload_mgr is aligned. */ - userbuf = pipe->screen->user_buffer_create(pipe->screen, - ptr, 0, - PIPE_BIND_INDEX_BUFFER); - indexBuffer = userbuf; - r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); - pipe_resource_reference(&userbuf, NULL); + * every sub-buffer in the upload buffer is aligned. */ + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, + count, (uint8_t*)ptr); } pipe_buffer_unmap(pipe, transfer); } else { if (r300_resource(indexBuffer)->b.user_ptr) - r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); + r300_upload_index_buffer(r300, &indexBuffer, indexSize, + &start, count, + r300_resource(indexBuffer)->b.user_ptr); } /* 19 dwords for emit_draw_elements. Give up if the function fails. */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 04afae609f9..7855d70a973 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -51,10 +51,9 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, - unsigned count) + unsigned count, uint8_t *ptr) { unsigned index_offset; - uint8_t *ptr = r300_resource(*index_buffer)->b.user_ptr; boolean flushed; *index_buffer = NULL; diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index ae87c4406a7..14bee460d5b 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -40,7 +40,7 @@ void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, - unsigned count); + unsigned count, uint8_t *ptr); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); -- cgit v1.2.3 From 437583ea637ab402a06ae6683af6df35d52512d4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 10:11:50 +0100 Subject: r300g: cleanup the draw functions --- src/gallium/drivers/r300/r300_render.c | 126 +++++++++++++-------------------- src/gallium/drivers/r300/r300_state.c | 3 + 2 files changed, 53 insertions(+), 76 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 37b9934a619..299038a4e83 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -366,10 +366,9 @@ static boolean immd_is_good_idea(struct r300_context *r300, * The HWTCL draw functions. * ****************************************************************************/ -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, unsigned start, + unsigned count) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -546,17 +545,11 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } -/* This is the fast-path drawing & emission for HW TCL. */ -static void r300_draw_range_elements(struct pipe_context* pipe, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count, - boolean user_buffers) +static void r300_draw_elements(struct r300_context *r300, int indexBias, + unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count, + boolean user_buffers) { - struct r300_context* r300 = r300_context(pipe); struct pipe_resource *indexBuffer = r300->index_buffer.buffer; unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; @@ -579,7 +572,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, !r300_resource(indexBuffer)->b.user_ptr) { struct pipe_transfer *transfer; - uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer, + uint16_t *ptr = pipe_buffer_map(&r300->context, indexBuffer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, &transfer); @@ -593,7 +586,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count, (uint8_t*)ptr); } - pipe_buffer_unmap(pipe, transfer); + pipe_buffer_unmap(&r300->context, transfer); } else { if (r300_resource(indexBuffer)->b.user_ptr) r300_upload_index_buffer(r300, &indexBuffer, indexSize, @@ -604,7 +597,8 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias, user_buffers)) + PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias, + user_buffers)) goto done; if (alt_num_verts || count <= 65535) { @@ -640,44 +634,39 @@ done: } } -static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, +static void r300_draw_arrays(struct r300_context *r300, unsigned mode, unsigned start, unsigned count, boolean user_buffers) { - struct r300_context* r300 = r300_context(pipe); boolean alt_num_verts = r300->screen->caps.is_r500 && count > 65536 && r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; - if (immd_is_good_idea(r300, count)) { - r300_emit_draw_arrays_immediate(r300, mode, start, count); - } else { - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0, user_buffers)) - return; + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + NULL, 9, start, 0, user_buffers)) + return; - if (alt_num_verts || count <= 65535) { - r300_emit_draw_arrays(r300, mode, count); - } else { - do { - short_count = MIN2(count, 65535); - r300_emit_draw_arrays(r300, mode, short_count); + if (alt_num_verts || count <= 65535) { + r300_emit_draw_arrays(r300, mode, count); + } else { + do { + short_count = MIN2(count, 65535); + r300_emit_draw_arrays(r300, mode, short_count); - start += short_count; - count -= short_count; + start += short_count; + count -= short_count; - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (count) { - if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0, user_buffers)) - return; - } - } while (count); - } + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (count) { + if (!r300_prepare_for_rendering(r300, + PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + start, 0, user_buffers)) + return; + } + } while (count); } } @@ -688,18 +677,17 @@ static void r300_draw_vbo(struct pipe_context* pipe, unsigned count = info->count; boolean buffers_updated, uploader_flushed; boolean indexed = info->indexed && r300->index_buffer.buffer; + unsigned start_indexed = info->start + r300->index_buffer.offset; + int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index); - if (r300->skip_rendering) { - return; - } - - if (!u_trim_pipe_prim(info->mode, &count)) { + if (r300->skip_rendering || + !u_trim_pipe_prim(info->mode, &count)) { return; } + /* Start the vbuf manager and update buffers if needed. */ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, &buffers_updated, &uploader_flushed); - if (buffers_updated) { r300->vertex_arrays_dirty = TRUE; @@ -711,34 +699,20 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300->upload_vb_validated = FALSE; } - if (indexed) { - /* Compute the start for draw_elements, taking the offset into account. */ - unsigned start_indexed = - info->start + - (r300->index_buffer.offset / r300->index_buffer.index_size); - int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index); - - assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); - - /* Index buffer range checking. */ - if ((start_indexed + count) * r300->index_buffer.index_size > - r300->index_buffer.buffer->width0) { - fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); - return; - } - - if (max_index >= (1 << 24) - 1) { - fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index); - return; - } + /* Draw. */ + r300_update_derived_state(r300); - r300_update_derived_state(r300); - r300_draw_range_elements(pipe, info->index_bias, info->min_index, - max_index, info->mode, start_indexed, count, - buffers_updated); + if (indexed) { + r300_draw_elements(r300, info->index_bias, info->min_index, + max_index, info->mode, start_indexed, count, + buffers_updated); } else { - r300_update_derived_state(r300); - r300_draw_arrays(pipe, info->mode, info->start, count, buffers_updated); + if (immd_is_good_idea(r300, count)) { + r300_draw_arrays_immediate(r300, info->mode, info->start, count); + } else { + r300_draw_arrays(r300, info->mode, info->start, count, + buffers_updated); + } } u_vbuf_mgr_draw_end(r300->vbuf_mgr); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2ec96003795..242f883314e 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1510,9 +1510,12 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); + assert(ib->offset % ib->index_size == 0); + if (ib && ib->buffer) { pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + r300->index_buffer.offset /= r300->index_buffer.index_size; if (r300->screen->caps.has_tcl && !r300_resource(ib->buffer)->b.user_ptr) { -- cgit v1.2.3 From a0c293ec117c8a6f471061076ba87e245759e0f6 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 10:58:24 +0100 Subject: r300g: put indices in CS if there's just a few of them and are in user memory --- src/gallium/drivers/r300/r300_render.c | 102 ++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 299038a4e83..efad9fa1af7 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -545,6 +545,94 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } +static void r300_draw_elements_immediate(struct r300_context *r300, + int indexBias, unsigned minIndex, + unsigned maxIndex, unsigned mode, + unsigned start, unsigned count, + boolean user_buffers) +{ + uint8_t *ptr1; + uint16_t *ptr2; + uint32_t *ptr4; + unsigned index_size = r300->index_buffer.index_size; + unsigned i, count_dwords = index_size == 4 ? count : (count + 1) / 2; + CS_LOCALS(r300); + + /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias, + user_buffers)) + return; + + r300_emit_draw_init(r300, mode, minIndex, maxIndex); + + BEGIN_CS(2 + count_dwords); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); + + switch (index_size) { + case 1: + ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr1 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr1[i+1] + indexBias) << 16) | + (ptr1[i] + indexBias)); + + if (count & 1) + OUT_CS(ptr1[i] + indexBias); + } else { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr1[i+1]) << 16) | + (ptr1[i] )); + + if (count & 1) + OUT_CS(ptr1[i]); + } + break; + + case 2: + ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr2 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr2[i+1] + indexBias) << 16) | + (ptr2[i] + indexBias)); + + if (count & 1) + OUT_CS(ptr2[i] + indexBias); + } else { + OUT_CS_TABLE(ptr2, count_dwords); + } + break; + + case 4: + ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr4 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count; i++) + OUT_CS(ptr4[i] + indexBias); + } else { + OUT_CS_TABLE(ptr4, count_dwords); + } + break; + } + END_CS; +} + static void r300_draw_elements(struct r300_context *r300, int indexBias, unsigned minIndex, unsigned maxIndex, unsigned mode, unsigned start, unsigned count, @@ -703,9 +791,17 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); if (indexed) { - r300_draw_elements(r300, info->index_bias, info->min_index, - max_index, info->mode, start_indexed, count, - buffers_updated); + if (count <= 8 && + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { + r300_draw_elements_immediate(r300, info->index_bias, + info->min_index, max_index, + info->mode, start_indexed, count, + buffers_updated); + } else { + r300_draw_elements(r300, info->index_bias, info->min_index, + max_index, info->mode, start_indexed, count, + buffers_updated); + } } else { if (immd_is_good_idea(r300, count)) { r300_draw_arrays_immediate(r300, info->mode, info->start, count); -- cgit v1.2.3 From 9305e93114542632384eb38da08018b4b9d1ab96 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Feb 2011 21:55:29 +0000 Subject: svga: Set the appropriate flags when creating sampler/surface views. --- src/gallium/drivers/svga/svga_sampler_view.c | 3 ++- src/gallium/drivers/svga/svga_surface.c | 19 +++++++++++++++---- src/gallium/drivers/svga/svga_surface.h | 1 + 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 6911f13f778..079046e4686 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -49,6 +49,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct svga_screen *ss = svga_screen(pt->screen); struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; + SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; SVGA3dSurfaceFormat format = svga_translate_format(pt->format); boolean view = TRUE; @@ -126,7 +127,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->age = tex->age; - sv->handle = svga_texture_view_surface(pipe, tex, format, + sv->handle = svga_texture_view_surface(pipe, tex, flags, format, min_lod, max_lod - min_lod + 1, -1, -1, diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index 3e4bed76c05..0cb58e66111 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -102,6 +102,7 @@ svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface * svga_texture_view_surface(struct pipe_context *pipe, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, @@ -118,7 +119,7 @@ svga_texture_view_surface(struct pipe_context *pipe, "svga: Create surface view: face %d zslice %d mips %d..%d\n", face_pick, zslice_pick, start_mip, start_mip+num_mip-1); - key->flags = 0; + key->flags = flags; key->format = format; key->numMipLevels = num_mip; key->size.width = u_minify(tex->b.b.width0, start_mip); @@ -191,6 +192,7 @@ svga_create_surface(struct pipe_context *pipe, boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE; boolean view = FALSE; + SVGA3dSurfaceFlags flags; SVGA3dSurfaceFormat format; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -219,10 +221,18 @@ svga_create_surface(struct pipe_context *pipe, s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - if (!render) + if (!render) { + flags = SVGA3D_SURFACE_HINT_TEXTURE; format = svga_translate_format(surf_tmpl->format); - else + } else { + if (surf_tmpl->usage & PIPE_BIND_RENDER_TARGET) { + flags = SVGA3D_SURFACE_HINT_RENDERTARGET; + } + if (surf_tmpl->usage & PIPE_BIND_DEPTH_STENCIL) { + flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + } format = svga_translate_format_render(surf_tmpl->format); + } assert(format != SVGA3D_FORMAT_INVALID); @@ -249,7 +259,8 @@ svga_create_surface(struct pipe_context *pipe, SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", pt, surf_tmpl->u.tex.level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level, + s->handle = svga_texture_view_surface(NULL, tex, flags, format, + surf_tmpl->u.tex.level, 1, face, zslice, &s->key); s->real_face = 0; s->real_level = 0; diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index afb8326e1f3..755121945de 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -64,6 +64,7 @@ svga_surface_needs_propagation(struct pipe_surface *surf); struct svga_winsys_surface * svga_texture_view_surface(struct pipe_context *pipe, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, -- cgit v1.2.3 From 9e96ea0652dda64f8eb311d7dfc9c50519ad02f0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Feb 2011 13:07:29 -0500 Subject: r600g: add alignment cases for linear aligned Matches the drm and ddx. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/r600_texture.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index b8bc365bcbd..793bdc4d5ec 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -103,6 +103,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * rscreen->tiling_info->num_banks)) * 8; break; + case V_038000_ARRAY_LINEAR_ALIGNED: + p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize); + break; case V_038000_ARRAY_LINEAR_GENERAL: default: p_align = rscreen->tiling_info->group_bytes / pixsize; @@ -122,6 +125,7 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, h_align = rscreen->tiling_info->num_channels * 8; break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: h_align = 8; break; default: @@ -147,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, p_align * pixsize * h_align); break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: + case V_038000_ARRAY_LINEAR_GENERAL: default: b_align = rscreen->tiling_info->group_bytes; break; -- cgit v1.2.3 From d123959ff75b2a83e02f4594f3e072c31c7fd8d9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 14 Feb 2011 10:47:58 -0800 Subject: r300g: Remove redundant initialization. Remove redundant initialization from commit 3b01b52bd78e3d2fc857feacebd815a5fae00c94 noticed by tstellar. --- src/gallium/drivers/r300/r300_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 9c3a5e882f2..6ded8687bbc 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -815,7 +815,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->format_size; + unsigned *hw_format_size; unsigned size1, size2; CS_LOCALS(r300); -- cgit v1.2.3 From cdca3c58aa2d9549f5188910e2a77b438516714f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 10 Jan 2011 05:41:47 +0100 Subject: gallium: remove pipe_vertex_buffer::max_index This is redundant to pipe_draw_info::max_index and doesn't really fit in the optimizations I plan. --- src/gallium/auxiliary/draw/draw_llvm.c | 17 ++++------------- src/gallium/auxiliary/draw/draw_llvm.h | 5 +---- src/gallium/auxiliary/draw/draw_pt.c | 3 +-- src/gallium/auxiliary/draw/draw_pt_fetch.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 2 +- src/gallium/auxiliary/util/u_draw_quad.c | 1 - src/gallium/auxiliary/util/u_dump_state.c | 1 - src/gallium/docs/d3d11ddi.txt | 1 - src/gallium/drivers/nvc0/nvc0_vbo.c | 2 +- src/gallium/drivers/svga/svga_state_vs.c | 2 +- src/gallium/drivers/trace/tr_dump_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - .../state_trackers/d3d1x/dxgi/src/dxgi_native.cpp | 1 - src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h | 1 - src/gallium/state_trackers/vega/polygon.c | 2 -- src/gallium/tests/graw/fs-test.c | 1 - src/gallium/tests/graw/gs-test.c | 2 -- src/gallium/tests/graw/quad-tex.c | 1 - src/gallium/tests/graw/shader-leak.c | 1 - src/gallium/tests/graw/tri-gs.c | 1 - src/gallium/tests/graw/tri-instanced.c | 2 -- src/gallium/tests/graw/tri.c | 1 - src/gallium/tests/graw/vs-test.c | 1 - src/mesa/state_tracker/st_draw.c | 5 ----- src/mesa/state_tracker/st_draw_feedback.c | 1 - 26 files changed, 12 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a73bdd78087..a5217c1d4ec 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -214,13 +214,12 @@ static LLVMTypeRef create_jit_vertex_buffer_type(struct gallivm_state *gallivm) { LLVMTargetDataRef target = gallivm->target; - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[3]; LLVMTypeRef vb_type; elem_types[0] = - elem_types[1] = - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); - elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -229,10 +228,8 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, max_index, - target, vb_type, 1); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); + target, vb_type, 1); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -513,9 +510,7 @@ generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); - LLVMValueRef cond; LLVMValueRef stride; if (velem->instance_divisor) { @@ -525,10 +520,6 @@ generate_fetch(struct gallivm_state *gallivm, "instance_divisor"); } - /* limit index to min(index, vb_max_index) */ - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); - index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); - stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 9f038f1f04d..e8623e7bcdc 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -133,11 +133,8 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, 0, "stride") -#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 1, "max_index") - #define draw_jit_vbuffer_offset(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset") + lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") typedef int diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 4078b2a07d0..c3d7e871f7a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -459,10 +459,9 @@ draw_vbo(struct draw_context *draw, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u offset=%u ptr=%p\n", i, draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index ae12ee24bdc..4fa3b265e10 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run_elts( translate, @@ -166,7 +166,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e706b7796f8..51043102a61 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index c98fb3d5205..1e926fb028e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index ); + draw->pt.user.max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 2747cd4b0c1..0defd919974 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -56,7 +56,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.buffer = vbuf; vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; - vbuffer.max_index = num_verts - 1; if (cso) { cso_set_vertex_buffers(cso, 1, &vbuffer); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index b471d59eebf..5ecf8cbb067 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -681,7 +681,6 @@ util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffe util_dump_struct_begin(stream, "pipe_vertex_buffer"); util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, max_index); util_dump_member(stream, uint, state, buffer_offset); util_dump_member(stream, ptr, state, buffer); diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index 11e77190895..0a9e7e50f1d 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -337,7 +337,6 @@ IaSetTopology + Gallium supports line loops, triangle fans, quads, quad strips and polygons IaSetVertexBuffers -> set_vertex_buffers - + Gallium allows to specify a max_index here - Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset OpenResource -> texture_from_handle diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index aa5decfc233..80e05823759 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -152,7 +152,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { /* TODO: use min and max instance divisor to get a proper range */ *base = 0; - *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vtxbuf[vbi].buffer->width0; } else { assert(nvc0->vbo_max_index != ~0); *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 6682a1efe66..ae9a20ebb81 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride, vbuffer->max_index); + vbuffer->stride, ~0); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 155c869fbd9..18805655bd7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(resource_ptr, state, buffer); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 574a7a80111..cf6c5b50268 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -408,7 +408,6 @@ struct pipe_transfer struct pipe_vertex_buffer { unsigned stride; /**< stride to same attrib in next vertex, in bytes */ - unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ struct pipe_resource *buffer; /**< the actual buffer */ }; diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp index 2ff24e17d41..61cf2ddd9df 100644 --- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -796,7 +796,6 @@ struct dxgi_blitter vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data)); vbuf.buffer_offset = 0; - vbuf.max_index = ~0; vbuf.stride = 4 * sizeof(float); pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data); diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index e1ba6c184fd..542d6591293 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -623,7 +623,6 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; vertex_buffers[start + i].buffer_offset = new_offsets[i]; vertex_buffers[start + i].stride = new_strides[i]; - vertex_buffers[start + i].max_index = ~0; last_different = i; } } diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index a491de27fa6..bcc5cb272ca 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -303,7 +303,6 @@ void polygon_fill(struct polygon *poly, struct vg_context *ctx) vbuffer.buffer = poly->vbuf; vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil_begin(ctx->renderer, &velement, ctx->state.vg.fill_rule, VG_FALSE); @@ -354,7 +353,6 @@ void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx) polygon_prepare_buffer(ctx, poly); vbuffer.buffer = poly->vbuf; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil(ctx->renderer, &vbuffer, PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts); diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index d21eb44e116..ff82b607110 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -215,7 +215,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index 0c65390e109..cc05889dd05 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -251,13 +251,11 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); vbuf.buffer_offset = 0; if (draw_strip) { - vbuf.max_index = sizeof(vertices_strip) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices_strip, sizeof(vertices_strip), PIPE_BIND_VERTEX_BUFFER); } else { - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices, sizeof(vertices), diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c index 58ca639d207..4e66813b301 100644 --- a/src/gallium/tests/graw/quad-tex.c +++ b/src/gallium/tests/graw/quad-tex.c @@ -97,7 +97,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c index 9af76f51ea2..a23ca73ac1d 100644 --- a/src/gallium/tests/graw/shader-leak.c +++ b/src/gallium/tests/graw/shader-leak.c @@ -88,7 +88,6 @@ static void set_vertices( void ) vbuf.stride = sizeof(struct vertex); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index a1a00b32098..47b76530c6b 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -89,7 +89,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index f61d8b9844d..259b3d9527c 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -132,7 +132,6 @@ static void set_vertices( void ) /* vertex data */ vbuf[0].stride = sizeof( struct vertex ); - vbuf[0].max_index = sizeof(vertices) / vbuf[0].stride; vbuf[0].buffer_offset = 0; vbuf[0].buffer = screen->user_buffer_create(screen, vertices, @@ -141,7 +140,6 @@ static void set_vertices( void ) /* instance data */ vbuf[1].stride = sizeof( inst_data[0] ); - vbuf[1].max_index = sizeof(inst_data) / vbuf[1].stride; vbuf[1].buffer_offset = 0; vbuf[1].buffer = screen->user_buffer_create(screen, inst_data, diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index 006d61ca88c..4266c0394d8 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -93,7 +93,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 1358fa85dfd..dd64d8b9301 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -171,7 +171,6 @@ static void set_vertices( void ) } vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index d9b99a34990..4cbcecfd8ba 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -384,7 +384,6 @@ setup_interleaved_attribs(struct gl_context *ctx, vbuffer->buffer_offset = pointer_to_offset(low); } vbuffer->stride = stride; /* in bytes */ - vbuffer->max_index = max_index; } /* @@ -488,10 +487,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx, /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ - if (arrays[mesaAttr]->InstanceDivisor) - vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement; - else - vbuffer[attr].max_index = max_index; velements[attr].src_offset = 0; velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 545b32d75bb..1e1220bfe52 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -179,7 +179,6 @@ st_feedback_draw_vbo(struct gl_context *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffers[attr].max_index = max_index; velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].src_format = -- cgit v1.2.3 From 588fa884d212eba5ffbc69fda75db37d7c77214c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 9 Feb 2011 01:10:11 +0100 Subject: gallium: notify drivers about possible changes in user buffer contents Also implement the redefine_user_buffer hook in the drivers. --- src/gallium/auxiliary/util/u_blitter.c | 4 +++ src/gallium/auxiliary/util/u_transfer.c | 7 +++++ src/gallium/auxiliary/util/u_transfer.h | 10 +++---- src/gallium/docs/source/context.rst | 16 ++++++++++ src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 ++ src/gallium/drivers/failover/fo_state.c | 2 ++ src/gallium/drivers/galahad/glhd_context.c | 14 +++++++++ src/gallium/drivers/i915/i915_state.c | 2 ++ src/gallium/drivers/i965/brw_pipe_vertex.c | 2 ++ src/gallium/drivers/identity/id_context.c | 14 +++++++++ src/gallium/drivers/llvmpipe/lp_state_vertex.c | 3 ++ src/gallium/drivers/noop/noop_state.c | 2 ++ src/gallium/drivers/nv50/nv50_state.c | 2 ++ src/gallium/drivers/nvc0/nvc0_state.c | 3 ++ src/gallium/drivers/nvfx/nvfx_vbo.c | 3 ++ src/gallium/drivers/r300/r300_state.c | 2 ++ src/gallium/drivers/r600/evergreen_state.c | 1 + src/gallium/drivers/r600/r600_state.c | 2 ++ src/gallium/drivers/rbug/rbug_context.c | 14 +++++++++ src/gallium/drivers/softpipe/sp_state_vertex.c | 2 ++ src/gallium/drivers/svga/svga_pipe_vertex.c | 2 ++ src/gallium/drivers/trace/tr_context.c | 23 ++++++++++++++ src/gallium/include/pipe/p_context.h | 8 +++++ src/mesa/state_tracker/st_context.c | 5 ++++ src/mesa/state_tracker/st_context.h | 5 ++++ src/mesa/state_tracker/st_draw.c | 38 ++++++++++++++++++++++++ 26 files changed, 182 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index e27c445096d..fd1c2b72d04 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -639,6 +639,8 @@ static void blitter_draw_rectangle(struct blitter_context *blitter, } blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } @@ -867,6 +869,8 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Draw. */ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); break; diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c index e2828cfd99e..b6c63d9642f 100644 --- a/src/gallium/auxiliary/util/u_transfer.c +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -112,3 +112,10 @@ void u_default_transfer_destroy(struct pipe_context *pipe, FREE(transfer); } +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + resource->width0 = MAX2(resource->width0, offset + size); +} diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h index 52191512ac7..8cf9c418b04 100644 --- a/src/gallium/auxiliary/util/u_transfer.h +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -136,11 +136,9 @@ void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, unsigned stride, unsigned layer_stride); - - - - - - +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); #endif diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index da847262c13..04a39511287 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -392,6 +392,22 @@ be flushed on write or unmap. Flushes must be requested with ``transfer_flush_region``. Flush ranges are relative to the mapped range, not the beginning of the resource. + + +.. _redefine_user_buffer: + +redefine_user_buffer +%%%%%%%%%%%%%%%%%%%% + +This function notifies a driver that the user buffer content has been changed. +The updated region starts at ``offset`` and is ``size`` bytes large. +The ``offset`` is relative to the pointer specified in ``user_buffer_create``. +While uploading the user buffer, the driver is allowed not to upload +the memory outside of this region. +The width0 is redefined to ``MAX2(width0, offset+size)``. + + + .. _pipe_transfer: PIPE_TRANSFER diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index eb22a09a913..7f65b82619e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -33,6 +33,7 @@ #include "cell_state.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -115,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell) cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; + cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index af1fd953aaf..b4da1b8b901 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -30,6 +30,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "fo_context.h" @@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; + failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 8cbf0b1de4a..75e4c253dd9 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -962,6 +962,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context, } +static void galahad_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1036,6 +1049,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer; glhd_pipe->pipe = pipe; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index b31cc306a44..f380708847b 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -33,6 +33,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" @@ -895,4 +896,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_index_buffer = i915_set_index_buffer; + i915->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index b23454b5808..570ea23ff45 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -4,6 +4,7 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_transfer.h" static unsigned brw_translate_surface_format( unsigned id ) @@ -302,6 +303,7 @@ brw_pipe_vertex_init( struct brw_context *brw ) brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; + brw->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 3efbd6a246d..b533abe24c6 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -855,6 +855,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context, } +static void identity_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *context = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -929,6 +942,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region; id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write; + id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index fffdeb6ccde..be86f66de91 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -34,6 +34,7 @@ #include "draw/draw_context.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" static void * @@ -114,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; + + llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c index ad324774c03..00a4c1eb01e 100644 --- a/src/gallium/drivers/noop/noop_state.c +++ b/src/gallium/drivers/noop/noop_state.c @@ -28,6 +28,7 @@ #include #include #include +#include "util/u_transfer.h" static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { @@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index b4eda0f617d..ba2c3e8c281 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -23,6 +23,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -886,5 +887,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; nv50->pipe.set_index_buffer = nv50_set_index_buffer; + nv50->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index f6a7f824d58..7fb91b1191d 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -22,6 +22,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -934,5 +935,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; + + nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 01dacb43dad..b72379d6536 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_transfer.h" #include "translate/translate.h" #include "nvfx_context.h" @@ -631,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx) nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 242f883314e..1ec942854ff 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -28,6 +28,7 @@ #include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -1843,6 +1844,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_vertex_buffers = r300_set_vertex_buffers; r300->context.set_index_buffer = r300_set_index_buffer; + r300->context.redefine_user_buffer = u_default_redefine_user_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 88dcc9ba544..89e2d06abdd 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -905,6 +905,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void evergreen_init_config(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 01c59072a26..43cba667de9 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -37,6 +37,7 @@ #include #include #include +#include "util/u_transfer.h" #include #include "r600.h" #include "r600d.h" @@ -941,6 +942,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void r600_init_config(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 94e57e40f86..3aefb5b3bb5 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -987,6 +987,19 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, } +static void rbug_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1072,6 +1085,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap; rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region; rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write; + rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer; rb_pipe->pipe = pipe; diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 5f4d661abde..aa0b333c7a9 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -119,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe) pipe->set_vertex_buffers = softpipe_set_vertex_buffers; pipe->set_index_buffer = softpipe_set_index_buffer; + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 86c79459f3e..6bf37fbbbaf 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" @@ -131,6 +132,7 @@ void svga_init_vertex_functions( struct svga_context *svga ) svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; + svga->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index eaabae8ce42..d24cc623c2e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1419,6 +1419,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context, } +static void trace_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_resource *tr_tex = trace_resource(_resource); + struct pipe_context *context = tr_context->pipe; + struct pipe_resource *resource = tr_tex->resource; + + assert(resource->screen == context->screen); + + trace_dump_call_begin("pipe_context", "redefine_user_buffer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, offset); + trace_dump_arg(uint, size); + + trace_dump_call_end(); + + context->redefine_user_buffer(context, resource, offset, size); +} static const struct debug_named_value rbug_blocker_flags[] = { @@ -1506,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region; tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write; + tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer; tr_ctx->pipe = pipe; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 589cac2ddd3..24ee3fe1175 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -399,6 +399,14 @@ struct pipe_context { unsigned stride, unsigned layer_stride); + + /* Notify a driver that a content of a user buffer has been changed. + * The changed range is [offset, offset+size-1]. + * The new width0 of the buffer is offset+size. */ + void (*redefine_user_buffer)(struct pipe_context *, + struct pipe_resource *, + unsigned offset, + unsigned size); }; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index dccbff3c1db..7a19f35bbf5 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -203,6 +203,11 @@ static void st_destroy_context_priv( struct st_context *st ) st_destroy_drawpix(st); st_destroy_drawtex(st); + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + for (i = 0; i < Elements(st->state.sampler_views); i++) { pipe_sampler_view_reference(&st->state.sampler_views[i], NULL); } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 64a8f790e22..77765f02379 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -185,6 +185,11 @@ struct st_context int force_msaa; void *winsys_drawable_handle; + + /* User vertex buffers. */ + struct pipe_resource *user_vb[PIPE_MAX_ATTRIBS]; + unsigned user_vb_stride[PIPE_MAX_ATTRIBS]; + unsigned num_user_vbs; }; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 34f75a37969..830e3e3c1bb 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -330,6 +330,11 @@ setup_interleaved_attribs(struct gl_context *ctx, stride * (max_index + 1), PIPE_BIND_VERTEX_BUFFER); vbuffer->buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[0], vbuffer->buffer); + st->user_vb_stride[0] = stride; + st->num_user_vbs = 1; } vbuffer->stride = stride; /* in bytes */ } @@ -405,6 +410,11 @@ setup_non_interleaved_attribs(struct gl_context *ctx, } vbuffer[attr].buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer); + st->user_vb_stride[attr] = stride; + st->num_user_vbs = MAX2(st->num_user_vbs, attr+1); } /* common-case setup */ @@ -538,12 +548,20 @@ st_validate_varrays(struct gl_context *ctx, struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned num_vbuffers, num_velements; GLuint attr; + unsigned i; /* must get these after state validation! */ vp = st->vp; vpv = st->vp_variant; memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); + + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + st->num_user_vbs = 0; + /* * Setup the vbuffer[] and velements[] arrays. */ @@ -646,6 +664,26 @@ st_draw_vbo(struct gl_context *ctx, #endif } + /* Notify the driver that the content of user buffers may have been + * changed. */ + if (!new_array && st->num_user_vbs) { + for (i = 0; i < st->num_user_vbs; i++) { + if (st->user_vb[i]) { + unsigned stride = st->user_vb_stride[i]; + + if (stride) { + pipe->redefine_user_buffer(pipe, st->user_vb[i], + min_index * stride, + (max_index + 1 - min_index) * stride); + } else { + /* stride == 0 */ + pipe->redefine_user_buffer(pipe, st->user_vb[i], + 0, st->user_vb[i]->width0); + } + } + } + } + setup_index_buffer(ctx, ib, &ibuffer); pipe->set_index_buffer(pipe, &ibuffer); -- cgit v1.2.3 From 20112cca262c926de887dff7d7f108b046ea781a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 14 Feb 2011 23:33:06 +0100 Subject: r300g: do not track whether occlusion queries have been flushed The winsys takes care of flushing automatically. --- src/gallium/drivers/r300/r300_context.h | 2 -- src/gallium/drivers/r300/r300_emit.c | 1 - src/gallium/drivers/r300/r300_flush.c | 6 ------ src/gallium/drivers/r300/r300_query.c | 14 +++++--------- 4 files changed, 5 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6e940b46fa4..58a7129849c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -268,8 +268,6 @@ struct r300_query { /* How many results have been written, in dwords. It's incremented * after end_query and flush. */ unsigned num_results; - /* if we've flushed the query */ - boolean flushed; /* if begin has been emitted */ boolean begin_emitted; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 6ded8687bbc..bd864b96167 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -559,7 +559,6 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; - query->flushed = FALSE; } static void r300_emit_query_end_frag_pipes(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 1e80f802f56..0db13657027 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -36,7 +36,6 @@ static void r300_flush(struct pipe_context* pipe, struct pipe_fence_handle** fence) { struct r300_context *r300 = r300_context(pipe); - struct r300_query *query; struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; @@ -76,11 +75,6 @@ static void r300_flush(struct pipe_context* pipe, r300->rws->cs_flush(r300->cs); } - /* reset flushed query */ - foreach(query, &r300->query_list) { - query->flushed = TRUE; - } - /* Create a new fence. */ if (rfence) { *rfence = CALLOC_STRUCT(r300_fence); diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index da871dc3a87..717485f43cb 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -127,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_query *q = r300_query(query); - unsigned flags, i; + unsigned i; uint32_t temp, *map; - uint64_t *result = (uint64_t*)vresult; - - if (!q->flushed) - pipe->flush(pipe, 0, NULL); - - flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(q->buf, r300->cs, flags); + map = r300->rws->buffer_map(q->buf, r300->cs, + PIPE_TRANSFER_READ | + (!wait ? PIPE_TRANSFER_DONTBLOCK : 0)); if (!map) return FALSE; @@ -149,7 +145,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, r300->rws->buffer_unmap(q->buf); - *result = temp; + *((uint64_t*)vresult) = temp; return TRUE; } -- cgit v1.2.3 From 56029ce52bafbc51b5b6660383767257b7770cd7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 15 Feb 2011 01:17:29 +0100 Subject: r300g: inline some of the pipe_buffer_map/unmap calls --- src/gallium/drivers/r300/r300_render.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index efad9fa1af7..cdca74e660d 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -389,9 +389,8 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; CS_LOCALS(r300); @@ -408,12 +407,10 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!transfer[vbi]) { - map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - r300->vbuf_mgr->real_vertex_buffer[vbi], - PIPE_TRANSFER_READ | - PIPE_TRANSFER_UNSYNCHRONIZED, - &transfer[vbi]); + if (!map[vbi]) { + map[vbi] = (uint32_t*)r300->rws->buffer_map( + r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf, + r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); @@ -439,9 +436,9 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (transfer[vbi]) { - pipe_buffer_unmap(&r300->context, transfer[vbi]); - transfer[vbi] = NULL; + if (map[vbi]) { + r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf); + map[vbi] = NULL; } } } @@ -658,12 +655,11 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, /* Fallback for misaligned ushort indices. */ if (indexSize == 2 && (start & 1) && !r300_resource(indexBuffer)->b.user_ptr) { - struct pipe_transfer *transfer; - - uint16_t *ptr = pipe_buffer_map(&r300->context, indexBuffer, - PIPE_TRANSFER_READ | - PIPE_TRANSFER_UNSYNCHRONIZED, - &transfer); + /* If we got here, then orgIndexBuffer == indexBuffer. */ + uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf, + r300->cs, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED); if (mode == PIPE_PRIM_TRIANGLES) { memcpy(indices3, ptr + start, 6); @@ -674,7 +670,7 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count, (uint8_t*)ptr); } - pipe_buffer_unmap(&r300->context, transfer); + r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf); } else { if (r300_resource(indexBuffer)->b.user_ptr) r300_upload_index_buffer(r300, &indexBuffer, indexSize, -- cgit v1.2.3 From 4faf11ad6c44902e17c648c28d40e10067f83612 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 15 Feb 2011 01:19:01 +0100 Subject: r300g: fix SIGFPE on debug builds --- src/gallium/drivers/r300/r300_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 1ec942854ff..aa18ab7d1e3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1511,9 +1511,9 @@ static void r300_set_index_buffer(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - assert(ib->offset % ib->index_size == 0); - if (ib && ib->buffer) { + assert(ib->offset % ib->index_size == 0); + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); r300->index_buffer.offset /= r300->index_buffer.index_size; -- cgit v1.2.3 From 18b4978ac8d2fb9b4f0830f33267e36ffc67b89c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 15 Feb 2011 01:41:16 +0100 Subject: r300g: implement fences using dummy relocations So finally we have them. --- src/gallium/drivers/r300/r300_context.c | 25 ----------------------- src/gallium/drivers/r300/r300_context.h | 17 ---------------- src/gallium/drivers/r300/r300_flush.c | 35 ++++++++++++++++++++++----------- src/gallium/drivers/r300/r300_screen.c | 20 ++++++++----------- 4 files changed, 32 insertions(+), 65 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 3608c04dc93..a89bf7fac31 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -532,28 +532,3 @@ fail: r300_destroy_context(&r300->context); return NULL; } - -void r300_finish(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb; - unsigned i; - - /* This is a preliminary implementation of glFinish. - * - * The ideal implementation should use something like EmitIrqLocked and - * WaitIrq, or better, real fences. - */ - if (r300->fb_state.state) { - fb = r300->fb_state.state; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (fb->cbufs[i]->texture) { - r300->rws->buffer_wait(r300_resource(fb->cbufs[i]->texture)->buf); - return; - } - } - if (fb->zsbuf && fb->zsbuf->texture) { - r300->rws->buffer_wait(r300_resource(fb->zsbuf->texture)->buf); - } - } -} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 58a7129849c..883b5f99c07 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -284,22 +284,6 @@ struct r300_query { struct r300_query* next; }; -/* Fence object. - * - * This is a fake fence. Instead of syncing with the fence, we sync - * with the context, which is inefficient but compliant. - * - * This is not a subclass of pipe_fence_handle because pipe_fence_handle is - * never actually fully defined. So, rather than have it as a member, and do - * subclass-style casting, we treat pipe_fence_handle as an opaque, and just - * trust that our state tracker does not ever mess up fence objects. - */ -struct r300_fence { - struct pipe_reference reference; - struct r300_context *ctx; - boolean signalled; -}; - struct r300_surface { struct pipe_surface base; @@ -651,7 +635,6 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300, struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -void r300_finish(struct r300_context *r300); void r300_flush_cb(void *data); /* Context initialization. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 0db13657027..bfc15ceae72 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,11 +37,23 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_atom *atom; - struct r300_fence **rfence = (struct r300_fence**)fence; + struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence; if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); + if (rfence) { + /* Create a fence, which is a dummy BO. */ + *rfence = r300->rws->buffer_create(r300->rws, 1, 1, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + R300_DOMAIN_GTT); + /* Add the fence as a dummy relocation. */ + r300->rws->cs_add_reloc(r300->cs, + r300->rws->buffer_get_cs_handle(*rfence), + R300_DOMAIN_GTT, R300_DOMAIN_GTT); + } + if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); @@ -70,16 +82,17 @@ static void r300_flush(struct pipe_context* pipe, r300->upload_vb_validated = FALSE; r300->upload_ib_validated = FALSE; } else { - /* Even if hw is not dirty, we should at least reset the CS in case - * the space checking failed for the first draw operation. */ - r300->rws->cs_flush(r300->cs); - } - - /* Create a new fence. */ - if (rfence) { - *rfence = CALLOC_STRUCT(r300_fence); - pipe_reference_init(&(*rfence)->reference, 1); - (*rfence)->ctx = r300; + if (rfence) { + /* We have to create a fence object, but the command stream is empty + * and we cannot emit an empty CS. We must write some regs then. */ + CS_LOCALS(r300); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); + r300->rws->cs_flush(r300->cs); + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs); + } } } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 0b0220654a7..ed47315f42d 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -409,32 +409,28 @@ static void r300_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct r300_fence **oldf = (struct r300_fence**)ptr; - struct r300_fence *newf = (struct r300_fence*)fence; - - if (pipe_reference(&(*oldf)->reference, &newf->reference)) - FREE(*oldf); - - *ptr = fence; + r300_winsys_bo_reference((struct r300_winsys_bo**)ptr, + (struct r300_winsys_bo*)fence); } static int r300_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flags) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - return rfence->signalled ? 0 : 1; /* 0 == success */ + return !rws->buffer_is_busy(rfence) ? 0 : 1; /* 0 == success */ } static int r300_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flags) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - r300_finish(rfence->ctx); - rfence->signalled = TRUE; + rws->buffer_wait(rfence); return 0; /* 0 == success */ } -- cgit v1.2.3 From ea7a548d07ddc69c226a425af0f88f818203d6ee Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 14 Feb 2011 13:34:11 +1000 Subject: r600g: drop tiled flag we can work this out from the array_mode and it makes more sense to do that. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 8 +++----- src/gallium/drivers/r600/r600_resource.h | 3 ++- src/gallium/drivers/r600/r600_state.c | 6 ++---- src/gallium/drivers/r600/r600_texture.c | 4 +--- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 89e2d06abdd..45469db6c95 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -390,10 +390,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[1] = rbuffer->bo; pitch = align(tmp->pitch_in_pixels[0], 8); - if (tmp->tiled) { - array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; - } + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, @@ -694,7 +692,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state ntype != 4 && ntype != 5) color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); - if (rtex->tiled) { + if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { tile_type = rtex->tile_type; } else /* workaround for linear buffers */ tile_type = 1; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 2e7a28cc94f..a6d2141e5f2 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -59,7 +59,6 @@ struct r600_resource_texture { unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; - unsigned tiled; unsigned tile_type; unsigned depth; unsigned dirty_db; @@ -67,6 +66,8 @@ struct r600_resource_texture { boolean is_flushing_texture; }; +#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) + #define R600_BUFFER_MAGIC 0xabcd1600 /* XXX this could be removed */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 43cba667de9..bf74511b63f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -438,10 +438,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; pitch = align(tmp->pitch_in_pixels[0], 8); - if (tmp->tiled) { - array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; - } + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 793bdc4d5ec..30e31e58d31 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -388,8 +388,6 @@ r600_texture_create_object(struct pipe_screen *screen, if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) rtex->depth = 1; - if (array_mode) - rtex->tiled = 1; r600_setup_miptree(screen, rtex, array_mode); resource->size = rtex->size; @@ -557,7 +555,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ - if (rtex->tiled) + if (R600_TEX_IS_TILED(rtex, level)) use_staging_texture = TRUE; if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) -- cgit v1.2.3 From a661dacf143d7187abc2360ac945db75296f7e23 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Feb 2011 13:21:50 +1000 Subject: r600g: fix miptree calculations the miptree setup and pitch storing didn't work so well for block based things like compressed textures. The CB takes blocks, where the texture sampler takes pixels, and transfers need bytes, So now we store blocks/bytes and translate to pixels in the sampler. This is necessary for s3tc to work properly. --- src/gallium/drivers/r600/evergreen_state.c | 10 ++--- src/gallium/drivers/r600/r600_resource.h | 4 +- src/gallium/drivers/r600/r600_state.c | 10 ++--- src/gallium/drivers/r600/r600_texture.c | 64 ++++++++++++++---------------- 4 files changed, 41 insertions(+), 47 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 45469db6c95..261dd8d6ab8 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -389,7 +389,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - pitch = align(tmp->pitch_in_pixels[0], 8); + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; @@ -664,8 +664,8 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -747,8 +747,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index a6d2141e5f2..fdcfcd50a10 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -53,8 +53,8 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ + unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bf74511b63f..bd591bdd8cc 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -437,7 +437,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - pitch = align(tmp->pitch_in_pixels[0], 8); + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; @@ -709,8 +709,8 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[0] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -784,8 +784,8 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 30e31e58d31..db39383e306 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -85,7 +85,7 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, +static unsigned r600_get_block_alignment(struct pipe_screen *screen, enum pipe_format format, unsigned array_mode) { @@ -128,6 +128,7 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, case V_038000_ARRAY_LINEAR_ALIGNED: h_align = 8; break; + case V_038000_ARRAY_LINEAR_GENERAL: default: h_align = 1; break; @@ -141,7 +142,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, { struct r600_screen* rscreen = (struct r600_screen *)screen; unsigned pixsize = util_format_get_blocksize(format); - int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int p_align = r600_get_block_alignment(screen, format, array_mode); int h_align = r600_get_height_alignment(screen, array_mode); int b_align; @@ -169,25 +170,24 @@ static unsigned mip_minify(unsigned size, unsigned level) return val; } -static unsigned r600_texture_get_stride(struct pipe_screen *screen, - struct r600_resource_texture *rtex, - unsigned level) +static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { struct pipe_resource *ptex = &rtex->resource.b.b.b; - unsigned width, stride, tile_width; + unsigned nblocksx, block_align, width; + unsigned blocksize = util_format_get_blocksize(ptex->format); if (rtex->pitch_override) - return rtex->pitch_override; + return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - if (util_format_is_plain(ptex->format)) { - tile_width = r600_get_pixel_alignment(screen, ptex->format, - rtex->array_mode[level]); - width = align(width, tile_width); - } - stride = util_format_get_stride(ptex->format, width); + nblocksx = util_format_get_nblocksx(ptex->format, width); - return stride; + block_align = r600_get_block_alignment(screen, ptex->format, + rtex->array_mode[level]); + nblocksx = align(nblocksx, block_align); + return nblocksx; } static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, @@ -198,19 +198,11 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, unsigned height, tile_height; height = mip_minify(ptex->height0, level); - if (util_format_is_plain(ptex->format)) { - tile_height = r600_get_height_alignment(screen, - rtex->array_mode[level]); - height = align(height, tile_height); - } - return util_format_get_nblocksy(ptex->format, height); -} - -/* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) -{ - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + height = util_format_get_nblocksy(ptex->format, height); + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + return height; } static void r600_texture_set_array_mode(struct pipe_screen *screen, @@ -231,7 +223,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); @@ -251,17 +243,18 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct pipe_resource *ptex = &rtex->resource.b.b.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned pitch, size, layer_size, i, offset; - unsigned nblocksy; + unsigned size, layer_size, i, offset; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { + unsigned blocksize = util_format_get_blocksize(ptex->format); + r600_texture_set_array_mode(screen, rtex, i, array_mode); - pitch = r600_texture_get_stride(screen, rtex, i); + nblocksx = r600_texture_get_nblocksx(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); - layer_size = pitch * nblocksy; - + layer_size = nblocksx * nblocksy * blocksize; if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; @@ -275,8 +268,9 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch_in_bytes[i] = pitch; - rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); + rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ + rtex->pitch_in_bytes[i] = nblocksx * blocksize; + offset += size; } rtex->size = offset; -- cgit v1.2.3 From 8e0437914bb786d0b05be8f95e4ff37bf5a19f44 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Feb 2011 13:42:52 +1000 Subject: r600g: add support for s3tc formats. On r600, s3tc formats require a 1D tiled texture format, so we have to do uploads using a blit, via the 64-bit and 128-bit formats Based on the r600c code we use a 64 and 128-bit type to do the blits. Still requires R600_ENABLE_S3TC until the kernel fixes are in, this has only been tested on evergreen where the kernel doesn't yet get in the way. --- src/gallium/drivers/r600/r600_blit.c | 60 +++++++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/r600/r600_texture.c | 13 +++++++ 3 files changed, 75 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index fbade99fc54..6687d09e0fd 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -219,6 +219,47 @@ static void r600_hw_copy_region(struct pipe_context *ctx, r600_blitter_end(ctx); } +struct texture_orig_info { + unsigned format; + unsigned width0; + unsigned height0; +}; + +static void r600_s3tc_to_blittable(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + unsigned pixsize = util_format_get_blocksize(tex->format); + int new_format; + int new_height, new_width; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + + if (pixsize == 8) + new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */ + else + new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */ + + new_width = util_format_get_nblocksx(tex->format, orig->width0); + new_height = util_format_get_nblocksy(tex->format, orig->height0); + + tex->width0 = new_width; + tex->height0 = new_height; + tex->format = new_format; + +} + +static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + tex->format = orig->format; + tex->width0 = orig->width0; + tex->height0 = orig->height0; +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -228,13 +269,32 @@ static void r600_resource_copy_region(struct pipe_context *ctx, const struct pipe_box *src_box) { struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; + struct texture_orig_info orig_info[2]; + boolean restore_orig[2]; if (rsrc->depth && !rsrc->is_flushing_texture) r600_texture_depth_flush(ctx, src, FALSE); + restore_orig[0] = restore_orig[1] = FALSE; + + if (util_format_is_s3tc(src->format)) { + r600_s3tc_to_blittable(src, src_level, &orig_info[0]); + restore_orig[0] = TRUE; + } + + if (util_format_is_s3tc(dst->format)) { + r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); + restore_orig[1] = TRUE; + } + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); + if (restore_orig[0]) + r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]); + + if (restore_orig[1]) + r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index f9e8e76d241..9d6c9bd5429 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -502,6 +503,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); + util_format_s3tc_init(); util_slab_create(&rscreen->pool_buffers, sizeof(struct r600_resource_buffer), 64, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index db39383e306..dd14143c2c1 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -289,6 +290,10 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, else bind = PIPE_BIND_RENDER_TARGET; + /* hackaround for S3TC */ + if (util_format_is_s3tc(res->format)) + return TRUE; + if (!screen->is_format_supported(screen, res->format, res->target, @@ -417,6 +422,10 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } } + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + util_format_is_s3tc(templ->format)) + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, 0, 0, NULL); @@ -869,6 +878,10 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (!r600_enable_s3tc) goto out_unknown; + if (!util_format_s3tc_enabled) { + goto out_unknown; + } + switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: -- cgit v1.2.3 From b9e2cde6006b557a3a23a82384899f4d5a5ac7b8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 15 Feb 2011 05:43:44 +0100 Subject: r300g: offload the CS ioctl to another thread This is a multi-threading optimization which hides the kernel overhead behind a thread. It improves performance in CPU-limited apps by 2-15%. Of course you must have at least 2 cores for it to make any difference. It can be disabled with: export RADEON_THREAD=0 --- src/gallium/drivers/r300/r300_flush.c | 4 + src/gallium/drivers/r300/r300_winsys.h | 11 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 8 + src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 4 + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 266 +++++++++++++++++--------- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 53 +++-- 6 files changed, 233 insertions(+), 113 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index bfc15ceae72..c4bb332aec3 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -94,6 +94,10 @@ static void r300_flush(struct pipe_context* pipe, r300->rws->cs_flush(r300->cs); } } + + if (flags & PIPE_FLUSH_FRAME) { + r300->rws->cs_sync_flush(r300->cs); + } } void r300_init_flush_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index bf1dd5c980e..6733253ccc9 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -45,8 +45,8 @@ struct r300_winsys_screen; struct r300_winsys_cs_handle; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ }; enum r300_value_id { @@ -268,6 +268,13 @@ struct r300_winsys_screen { */ void (*cs_flush)(struct r300_winsys_cs *cs); + /** + * Wait until the last flush is completed. + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct r300_winsys_cs *cs); + /** * Set a flush callback which is called from winsys when flush is * required. diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index f3c4002883d..afb8131acbe 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -116,6 +116,10 @@ static void radeon_bo_wait(struct r300_winsys_bo *_buf) struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); struct drm_radeon_gem_wait_idle args = {}; + while (p_atomic_read(&bo->num_active_ioctls)) { + sched_yield(); + } + args.handle = bo->handle; while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)) == -EBUSY); @@ -126,6 +130,10 @@ static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf) struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); struct drm_radeon_gem_busy args = {}; + if (p_atomic_read(&bo->num_active_ioctls)) { + return TRUE; + } + args.handle = bo->handle; return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)) != 0; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index d877512be58..a26866b7e75 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -59,6 +59,10 @@ struct radeon_bo { /* how many command streams is this bo referenced in? */ int num_cs_references; + /* how many command streams, which are being emitted in a separate + * thread, is this bo referenced in? */ + int num_active_ioctls; + boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 5b2a17c856e..b4f5c9f6a88 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -73,6 +73,63 @@ #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) +{ + csc->fd = fd; + csc->nrelocs = 512; + csc->relocs_bo = (struct radeon_bo**) + CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); + if (!csc->relocs_bo) { + return FALSE; + } + + csc->relocs = (struct drm_radeon_cs_reloc*) + CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); + if (!csc->relocs) { + FREE(csc->relocs_bo); + return FALSE; + } + + csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + csc->chunks[0].length_dw = 0; + csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; + csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + csc->chunks[1].length_dw = 0; + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; + + csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; + csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; + + csc->cs.num_chunks = 2; + csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; + return TRUE; +} + +static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) +{ + unsigned i; + + for (i = 0; i < csc->crelocs; i++) { + radeon_bo_unref(csc->relocs_bo[i]); + p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); + csc->relocs_bo[i] = NULL; + } + + csc->crelocs = 0; + csc->chunks[0].length_dw = 0; + csc->chunks[1].length_dw = 0; + csc->used_gart = 0; + csc->used_vram = 0; + memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added)); +} + +static void radeon_destroy_cs_context(struct radeon_cs_context *csc) +{ + radeon_cs_context_cleanup(csc); + FREE(csc->relocs_bo); + FREE(csc->relocs); +} + static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); @@ -84,35 +141,29 @@ static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rw } cs->ws = ws; - cs->nrelocs = 256; - cs->relocs_bo = (struct radeon_bo**) - CALLOC(1, cs->nrelocs * sizeof(struct radeon_bo*)); - if (!cs->relocs_bo) { + + if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { FREE(cs); return NULL; } - - cs->relocs = (struct drm_radeon_cs_reloc*) - CALLOC(1, cs->nrelocs * sizeof(struct drm_radeon_cs_reloc)); - if (!cs->relocs) { - FREE(cs->relocs_bo); + if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { + radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } - cs->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - cs->chunks[0].length_dw = 0; - cs->chunks[0].chunk_data = (uint64_t)(uintptr_t)cs->base.buf; - cs->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - cs->chunks[1].length_dw = 0; - cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + /* Set the first command buffer as current. */ + cs->csc = &cs->csc1; + cs->cst = &cs->csc2; + cs->base.buf = cs->csc->buf; + p_atomic_inc(&ws->num_cs); return &cs->base; } #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) -static inline void update_domains(struct drm_radeon_cs_reloc *reloc, +static INLINE void update_domains(struct drm_radeon_cs_reloc *reloc, enum r300_buffer_domain rd, enum r300_buffer_domain wd, enum r300_buffer_domain *added_domains) @@ -131,22 +182,22 @@ static inline void update_domains(struct drm_radeon_cs_reloc *reloc, } } -int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo) +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) { struct drm_radeon_cs_reloc *reloc; unsigned i; - unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); - if (cs->is_handle_added[hash]) { - reloc = cs->relocs_hashlist[hash]; + if (csc->is_handle_added[hash]) { + reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { - return cs->reloc_indices_hashlist[hash]; + return csc->reloc_indices_hashlist[hash]; } /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = cs->crelocs; i != 0;) { + for (i = csc->crelocs; i != 0;) { --i; - reloc = &cs->relocs[i]; + reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { /* Put this reloc in the hash list. * This will prevent additional hash collisions if there are @@ -157,8 +208,8 @@ int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo) * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC * will collide here: ^ and here: ^, * meaning that we should get very few collisions in the end. */ - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = i; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ return i; } @@ -168,7 +219,7 @@ int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo) return -1; } -static void radeon_add_reloc(struct radeon_drm_cs *cs, +static void radeon_add_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo, enum r300_buffer_domain rd, enum r300_buffer_domain wd, @@ -176,24 +227,24 @@ static void radeon_add_reloc(struct radeon_drm_cs *cs, { struct drm_radeon_cs_reloc *reloc; unsigned i; - unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); - if (cs->is_handle_added[hash]) { - reloc = cs->relocs_hashlist[hash]; + if (csc->is_handle_added[hash]) { + reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); return; } /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = cs->crelocs; i != 0;) { + for (i = csc->crelocs; i != 0;) { --i; - reloc = &cs->relocs[i]; + reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = i; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ return; } @@ -201,35 +252,35 @@ static void radeon_add_reloc(struct radeon_drm_cs *cs, } /* New relocation, check if the backing array is large enough. */ - if (cs->crelocs >= cs->nrelocs) { + if (csc->crelocs >= csc->nrelocs) { uint32_t size; - cs->nrelocs += 10; + csc->nrelocs += 10; - size = cs->nrelocs * sizeof(struct radeon_bo*); - cs->relocs_bo = (struct radeon_bo**)realloc(cs->relocs_bo, size); + size = csc->nrelocs * sizeof(struct radeon_bo*); + csc->relocs_bo = (struct radeon_bo**)realloc(csc->relocs_bo, size); - size = cs->nrelocs * sizeof(struct drm_radeon_cs_reloc); - cs->relocs = (struct drm_radeon_cs_reloc*)realloc(cs->relocs, size); + size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); + csc->relocs = (struct drm_radeon_cs_reloc*)realloc(csc->relocs, size); - cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; } /* Initialize the new relocation. */ radeon_bo_ref(bo); p_atomic_inc(&bo->num_cs_references); - cs->relocs_bo[cs->crelocs] = bo; - reloc = &cs->relocs[cs->crelocs]; + csc->relocs_bo[csc->crelocs] = bo; + reloc = &csc->relocs[csc->crelocs]; reloc->handle = bo->handle; reloc->read_domains = rd; reloc->write_domain = wd; reloc->flags = 0; - cs->is_handle_added[hash] = TRUE; - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = cs->crelocs; + csc->is_handle_added[hash] = TRUE; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = csc->crelocs; - cs->chunks[1].length_dw += RELOC_DWORDS; - cs->crelocs++; + csc->chunks[1].length_dw += RELOC_DWORDS; + csc->crelocs++; *added_domains = rd | wd; } @@ -243,23 +294,23 @@ static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, struct radeon_bo *bo = (struct radeon_bo*)buf; enum r300_buffer_domain added_domains; - radeon_add_reloc(cs, bo, rd, wd, &added_domains); + radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); if (!added_domains) return; if (added_domains & R300_DOMAIN_GTT) - cs->used_gart += bo->size; + cs->csc->used_gart += bo->size; if (added_domains & R300_DOMAIN_VRAM) - cs->used_vram += bo->size; + cs->csc->used_vram += bo->size; } static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - return cs->used_gart < cs->ws->gart_size * 0.8 && - cs->used_vram < cs->ws->vram_size * 0.8; + return cs->csc->used_gart < cs->ws->gart_size * 0.8 && + cs->csc->used_vram < cs->ws->vram_size * 0.8; } static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, @@ -268,7 +319,7 @@ static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; - unsigned index = radeon_get_reloc(cs, bo); + unsigned index = radeon_get_reloc(cs->csc, bo); if (index == -1) { fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__); @@ -279,63 +330,89 @@ static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, OUT_CS(&cs->base, index * RELOC_DWORDS); } -static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_async, param) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - uint64_t chunk_array[2]; + struct radeon_cs_context *csc = (struct radeon_cs_context*)param; unsigned i; - int r; - if (cs->base.cdw) { - /* Prepare the arguments. */ - cs->chunks[0].length_dw = cs->base.cdw; - - chunk_array[0] = (uint64_t)(uintptr_t)&cs->chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&cs->chunks[1]; - - cs->cs.num_chunks = 2; - cs->cs.chunks = (uint64_t)(uintptr_t)chunk_array; - - /* Emit. */ - r = drmCommandWriteRead(cs->ws->fd, DRM_RADEON_CS, - &cs->cs, sizeof(struct drm_radeon_cs)); - if (r) { - if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { - fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); - fprintf(stderr, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, - cs->ws->pci_id); - for (i = 0; i < cs->base.cdw; i++) { - fprintf(stderr, "0x%08X\n", cs->base.buf[i]); - } - } else { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); + if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS, + &csc->cs, sizeof(struct drm_radeon_cs))) { + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + unsigned i; + + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + for (i = 0; i < csc->chunks[0].length_dw; i++) { + fprintf(stderr, "0x%08X\n", csc->buf[i]); } + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); } } - /* Unreference buffers, cleanup. */ - for (i = 0; i < cs->crelocs; i++) { - radeon_bo_unref(cs->relocs_bo[i]); - p_atomic_dec(&cs->relocs_bo[i]->num_cs_references); - cs->relocs_bo[i] = NULL; + for (i = 0; i < csc->crelocs; i++) + p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + return NULL; +} + +static void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + /* Wait for any pending ioctl to complete. */ + if (cs->thread) { + pipe_thread_wait(cs->thread); + cs->thread = 0; } +} + +DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) + +static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_cs_context *tmp; + + radeon_drm_cs_sync_flush(rcs); + + /* If the CS is not empty, emit it in a newly-spawned thread. */ + if (cs->base.cdw) { + unsigned i, crelocs = cs->csc->crelocs; + + cs->csc->chunks[0].length_dw = cs->base.cdw; + + for (i = 0; i < crelocs; i++) + p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); + + if (debug_get_option_thread()) { + cs->thread = pipe_thread_create(radeon_drm_cs_emit_async, cs->csc); + assert(cs->thread); + } else { + radeon_drm_cs_emit_async(cs->csc); + } + } + + /* Flip command streams. */ + tmp = cs->csc; + cs->csc = cs->cst; + cs->cst = tmp; + + /* Prepare a new CS. */ + radeon_cs_context_cleanup(cs->csc); + cs->base.buf = cs->csc->buf; cs->base.cdw = 0; - cs->crelocs = 0; - cs->chunks[0].length_dw = 0; - cs->chunks[1].length_dw = 0; - cs->used_gart = 0; - cs->used_vram = 0; - memset(cs->is_handle_added, 0, sizeof(cs->is_handle_added)); } static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + radeon_drm_cs_sync_flush(rcs); + radeon_cs_context_cleanup(&cs->csc1); + radeon_cs_context_cleanup(&cs->csc2); p_atomic_dec(&cs->ws->num_cs); - FREE(cs->relocs_bo); - FREE(cs->relocs); + radeon_destroy_cs_context(&cs->csc1); + radeon_destroy_cs_context(&cs->csc2); FREE(cs); } @@ -364,6 +441,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; ws->base.cs_flush = radeon_drm_cs_emit; + ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; ws->base.cs_set_flush = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 0183b877a3e..486fd237fc9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -30,34 +30,53 @@ #include "radeon_drm_bo.h" #include -struct radeon_drm_cs { - struct r300_winsys_cs base; - - /* The winsys. */ - struct radeon_drm_winsys *ws; +struct radeon_cs_context { + uint32_t buf[R300_MAX_CMDBUF_DWORDS]; - /* Flush CS. */ - void (*flush_cs)(void *); - void *flush_data; + int fd; + struct drm_radeon_cs cs; + struct drm_radeon_cs_chunk chunks[2]; + uint64_t chunk_array[2]; /* Relocs. */ - unsigned crelocs; unsigned nrelocs; - struct drm_radeon_cs_reloc *relocs; + unsigned crelocs; struct radeon_bo **relocs_bo; - struct drm_radeon_cs cs; - struct drm_radeon_cs_chunk chunks[2]; - - unsigned used_vram; - unsigned used_gart; + struct drm_radeon_cs_reloc *relocs; /* 0 = BO not added, 1 = BO added */ char is_handle_added[256]; struct drm_radeon_cs_reloc *relocs_hashlist[256]; unsigned reloc_indices_hashlist[256]; + + unsigned used_vram; + unsigned used_gart; +}; + +struct radeon_drm_cs { + struct r300_winsys_cs base; + + /* We flip between these two CS. While one is being consumed + * by the kernel in another thread, the other one is being filled + * by the pipe driver. */ + struct radeon_cs_context csc1; + struct radeon_cs_context csc2; + /* The currently-used CS. */ + struct radeon_cs_context *csc; + /* The CS being currently-owned by the other thread. */ + struct radeon_cs_context *cst; + + /* The winsys. */ + struct radeon_drm_winsys *ws; + + /* Flush CS. */ + void (*flush_cs)(void *); + void *flush_data; + + pipe_thread thread; }; -int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo); +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo); static INLINE struct radeon_drm_cs * radeon_drm_cs(struct r300_winsys_cs *base) @@ -69,7 +88,7 @@ static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, struct radeon_bo *bo) { return bo->num_cs_references == bo->rws->num_cs || - (bo->num_cs_references && radeon_get_reloc(cs, bo) != -1); + (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); } static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) -- cgit v1.2.3 From cd8af3b60bf1ab7ec157042d6a63fb7ece25d954 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 15 Feb 2011 00:25:08 -0800 Subject: nvc0: Fix uninitialized variable warning. Fixes this GCC warning. nvc0_tgsi_to_nc.c: In function 'bld_tex': nvc0_tgsi_to_nc.c:1392: warning: 'dim' may be used uninitialized in this function --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 72bfcd0c95f..687def0344d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1175,7 +1175,7 @@ static INLINE void describe_texture_target(unsigned target, int *dim, int *array, int *cube, int *shadow) { - *array = *cube = *shadow = 0; + *dim = *array = *cube = *shadow = 0; switch (target) { case TGSI_TEXTURE_1D: -- cgit v1.2.3 From 4b81c5f6e1ad73e344b1f1b5d5844297ee7b985a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Feb 2011 18:42:48 +1000 Subject: r600g: fix regression in r6/7xx since mipmap rework I typod this when copy-pasting. --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bd591bdd8cc..5e6821004df 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -709,7 +709,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_blocks[0] / 8 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(surf->base.format); -- cgit v1.2.3 From 82a9794a35744d60adc38b5eee53153c73d07331 Mon Sep 17 00:00:00 2001 From: Fabian Bieler <<der.fabe@gmx.net>> Date: Thu, 10 Feb 2011 16:57:34 +0100 Subject: r600g: Fix RGB10_A2 format handling --- src/gallium/drivers/r600/eg_state_inlines.h | 8 +++++--- src/gallium/drivers/r600/r600_state_inlines.h | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 46369cc1e26..c2e06c37ed5 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -352,9 +352,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD_REV; + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -429,7 +431,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_10_10_10_2; + return V_028C70_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 81805158966..fa6c24c2ece 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -345,9 +345,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD_REV; + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_SWAP_STD; @@ -422,7 +424,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_10_10_10_2; + return V_0280A0_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: -- cgit v1.2.3 From eafb7f234d11a290b00dcaf5492b9bdad1cf5148 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 13 Feb 2011 09:13:14 +0100 Subject: gallium: add usage parameter to pipe_buffer_create And set a reasonable buffer usage flag everywhere instead of just PIPE_USAGE_DEFAULT. --- src/gallium/auxiliary/util/u_blit.c | 1 + src/gallium/auxiliary/util/u_gen_mipmap.c | 1 + src/gallium/auxiliary/util/u_index_modify.c | 3 ++ src/gallium/auxiliary/util/u_inlines.h | 3 +- src/gallium/auxiliary/util/u_upload_mgr.c | 1 + src/gallium/drivers/r300/r300_render.c | 1 + src/gallium/drivers/svga/svga_draw_arrays.c | 1 + src/gallium/drivers/svga/svga_draw_elements.c | 1 + src/gallium/drivers/svga/svga_swtnl_backend.c | 3 ++ .../state_trackers/d3d1x/dxgi/src/dxgi_native.cpp | 3 +- src/gallium/state_trackers/vega/renderer.c | 4 ++- src/gallium/state_trackers/xorg/xorg_renderer.c | 1 + src/gallium/tests/trivial/quad-tex.c | 3 +- src/gallium/tests/trivial/tri.c | 3 +- src/mesa/state_tracker/st_cb_bitmap.c | 1 + src/mesa/state_tracker/st_cb_bufferobjects.c | 33 ++++++++++++++++++---- src/mesa/state_tracker/st_cb_clear.c | 1 + src/mesa/state_tracker/st_cb_drawpixels.c | 1 + src/mesa/state_tracker/st_cb_drawtex.c | 1 + 19 files changed, 55 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index f07ae2a84d5..76bd7ace526 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -199,6 +199,7 @@ get_next_slot( struct blit_state *ctx ) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d6f1f520859..3b6342ad8d1 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1350,6 +1350,7 @@ get_next_slot(struct gen_mipmap_state *ctx) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c index fda396ba094..d0a28b5fdfa 100644 --- a/src/gallium/auxiliary/util/u_index_modify.c +++ b/src/gallium/auxiliary/util/u_index_modify.c @@ -65,6 +65,7 @@ void util_shorten_ubyte_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, @@ -116,6 +117,7 @@ void util_rebuild_ushort_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, @@ -167,6 +169,7 @@ void util_rebuild_uint_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 01883271d6a..98889fb70ac 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -182,6 +182,7 @@ pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2) static INLINE struct pipe_resource * pipe_buffer_create( struct pipe_screen *screen, unsigned bind, + unsigned usage, unsigned size ) { struct pipe_resource buffer; @@ -189,7 +190,7 @@ pipe_buffer_create( struct pipe_screen *screen, buffer.target = PIPE_BUFFER; buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */ buffer.bind = bind; - buffer.usage = PIPE_USAGE_DEFAULT; + buffer.usage = usage; buffer.flags = 0; buffer.width0 = size; buffer.height0 = 1; diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 69b11f9bb77..dcf800a1e8e 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -121,6 +121,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, upload->buffer = pipe_buffer_create( upload->pipe->screen, upload->bind, + PIPE_USAGE_STREAM, size ); if (upload->buffer == NULL) goto fail; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cdca74e660d..7dc8ff08fa4 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -931,6 +931,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, pipe_resource_reference(&r300->vbo, NULL); r300->vbo = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index be0e7abe21b..a6518042eb9 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -53,6 +53,7 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index 83527c6ef49..7d420c6b295 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -56,6 +56,7 @@ translate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index 087f2d08540..ac9d637f8cb 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -87,12 +87,14 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); assert(!svga_render->vbuf); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); assert(svga_render->vbuf); } @@ -262,6 +264,7 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp index 61cf2ddd9df..2e45f3f43e9 100644 --- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -794,7 +794,8 @@ struct dxgi_blitter const unsigned semantic_indices[] = { 0, 0 }; vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, semantic_indices); - vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data)); + vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, sizeof(quad_data)); vbuf.buffer_offset = 0; vbuf.stride = 4 * sizeof(float); pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data); diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 952b12356a4..9de2cb1014d 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -174,6 +174,7 @@ static void renderer_set_mvp(struct renderer *renderer, pipe_resource_reference(&cbuf, NULL); cbuf = pipe_buffer_create(renderer->pipe->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, sizeof(consts)); if (cbuf) { pipe_buffer_write(renderer->pipe, cbuf, @@ -474,7 +475,8 @@ static void renderer_set_custom_fs(struct renderer *renderer, pipe_resource_reference(&cbuf, NULL); cbuf = pipe_buffer_create(renderer->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, const_buffer_len); + PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STATIC, + const_buffer_len); pipe_buffer_write(renderer->pipe, cbuf, 0, const_buffer_len, const_buffer); renderer->pipe->set_constant_buffer(renderer->pipe, diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 7c6b6c05dae..3b3ae455c0b 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -429,6 +429,7 @@ void renderer_set_constants(struct xorg_renderer *r, pipe_resource_reference(cbuf, NULL); *cbuf = pipe_buffer_create(r->pipe->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, param_bytes); if (*cbuf) { diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index 92c5b4dbb18..af93e09d8d4 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -129,7 +129,8 @@ static void init_prog(struct program *p) } }; - p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices)); + p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(vertices)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices); } diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 37c1573051f..b89cfe0d989 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -120,7 +120,8 @@ static void init_prog(struct program *p) } }; - p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices)); + p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(vertices)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices); } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index c40a510c351..0ea5671557c 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -349,6 +349,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if (!st->bitmap.vbuf) { st->bitmap.vbuf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof(st->bitmap.vertices)); } diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index d4d9af4ada3..12528f49ff7 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -171,7 +171,7 @@ st_bufferobj_data(struct gl_context *ctx, struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); - unsigned buffer_usage; + unsigned bind, pipe_usage; st_obj->Base.Size = size; st_obj->Base.Usage = usage; @@ -179,22 +179,43 @@ st_bufferobj_data(struct gl_context *ctx, switch(target) { case GL_PIXEL_PACK_BUFFER_ARB: case GL_PIXEL_UNPACK_BUFFER_ARB: - buffer_usage = PIPE_BIND_RENDER_TARGET; + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case GL_ARRAY_BUFFER_ARB: - buffer_usage = PIPE_BIND_VERTEX_BUFFER; + bind = PIPE_BIND_VERTEX_BUFFER; break; case GL_ELEMENT_ARRAY_BUFFER_ARB: - buffer_usage = PIPE_BIND_INDEX_BUFFER; + bind = PIPE_BIND_INDEX_BUFFER; break; default: - buffer_usage = 0; + bind = 0; + } + + switch (usage) { + case GL_STATIC_DRAW: + case GL_STATIC_READ: + case GL_STATIC_COPY: + pipe_usage = PIPE_USAGE_STATIC; + break; + case GL_DYNAMIC_DRAW: + case GL_DYNAMIC_READ: + case GL_DYNAMIC_COPY: + pipe_usage = PIPE_USAGE_DYNAMIC; + break; + case GL_STREAM_DRAW: + case GL_STREAM_READ: + case GL_STREAM_COPY: + pipe_usage = PIPE_USAGE_STREAM; + break; + default: + pipe_usage = PIPE_USAGE_DEFAULT; } pipe_resource_reference( &st_obj->buffer, NULL ); if (size != 0) { - st_obj->buffer = pipe_buffer_create(pipe->screen, buffer_usage, size); + st_obj->buffer = pipe_buffer_create(pipe->screen, bind, + pipe_usage, size); if (!st_obj->buffer) { return GL_FALSE; diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 69e03430507..d2e0cd73c76 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -138,6 +138,7 @@ draw_quad(struct st_context *st, if (!st->clear.vbuf) { st->clear.vbuf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof(st->clear.vertices)); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 03fc903c92e..07527002b4a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -522,6 +522,7 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, /* allocate/load buffer object with vertex data */ buf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(verts)); pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts); diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index 81a2479b95a..86ceb9d78b3 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -139,6 +139,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, /* create the vertex buffer */ vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, numAttribs * 4 * 4 * sizeof(GLfloat)); /* load vertex buffer */ -- cgit v1.2.3 From 632918d3ecd9756ad34098d28ed9eeda874d41a9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Feb 2011 08:50:30 +1000 Subject: r600g: add srgb compressed formats to the list. --- src/gallium/drivers/r600/r600_texture.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index dd14143c2c1..86de005031b 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -885,12 +885,16 @@ uint32_t r600_translate_texformat(enum pipe_format format, switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: result = FMT_BC1; goto out_word4; case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: result = FMT_BC2; goto out_word4; case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; default: -- cgit v1.2.3 From 38104a767c1e79000fecfcd8353b740c31601084 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 16 Feb 2011 00:51:01 +0100 Subject: r300g: disable linear filtering for float textures --- src/gallium/drivers/r300/r300_state_derived.c | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index d715dd82f86..5e86d18a746 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -687,6 +687,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, return uc.ui; } +static boolean util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!format) + return FALSE; + + /* Find the first non-void channel. */ + for (i = 0; i < 4; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -822,6 +841,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); } + /* Float textures only support nearest and mip-nearest filtering. */ + if (util_format_is_float(tex->b.b.b.format)) { + /* No MAG linear filtering. */ + if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) == + R300_TX_MAG_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK; + texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST; + } + /* No MIN linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) == + R300_TX_MIN_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST; + } + /* No mipmap linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) == + R300_TX_MIN_FILTER_MIP_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST; + } + /* No anisotropic filtering. */ + texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY; + } + texstate->filter0 |= i << 28; size += 16; -- cgit v1.2.3 From 0863eaf91cbbfb39f9b91b0d1217090a18e10082 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Feb 2011 15:56:11 +1000 Subject: r600g: fix s3tc-texsubimage we need to translate the destination box as well. fixes piglit's s3tc-texsubimage test. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_blit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 6687d09e0fd..06375f72d19 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -285,6 +285,9 @@ static void r600_resource_copy_region(struct pipe_context *ctx, if (util_format_is_s3tc(dst->format)) { r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); restore_orig[1] = TRUE; + /* translate the dst box as well */ + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + dsty = util_format_get_nblocksx(orig_info[1].format, dsty); } r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, -- cgit v1.2.3 From 4016a1b4c634b09ca4b3007d18e5ead1dd57a1bb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Feb 2011 16:01:36 +1000 Subject: r600g: add L4A4 support. this fixes piglit fbo-generatemipmap-formats on my rv730. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_state_inlines.h | 6 ++++++ src/gallium/drivers/r600/r600_state_inlines.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index c2e06c37ed5..3bf0970353f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -289,6 +289,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_A8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: @@ -382,6 +385,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index fa6c24c2ece..8cae4f4dcf6 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -289,6 +289,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -374,6 +377,9 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: -- cgit v1.2.3 From 04903d1f639d21e1476552adf4de7f4edc35b489 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Feb 2011 16:09:43 +1000 Subject: r600g: add L8A8 SRGB formats. this fixes the piglit mipmap generation sRGB on my rv730. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_state_inlines.h | 4 ++++ src/gallium/drivers/r600/r600_state_inlines.h | 4 ++++ src/gallium/drivers/r600/r600_texture.c | 2 -- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 3bf0970353f..f48b8a95d6f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -296,6 +296,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; @@ -316,6 +317,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -391,6 +393,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; @@ -411,6 +414,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 8cae4f4dcf6..f980610f463 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -285,6 +285,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_SWAP_STD; @@ -308,6 +309,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; @@ -384,6 +386,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; @@ -404,6 +407,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_COLOR_8_8; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 86de005031b..cc36d94cd7a 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -859,8 +859,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_SRGB: word4 |= S_038010_FORCE_DEGAMMA(1); - if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) - goto out_unknown; /* fails for some reason - TODO */ break; default: -- cgit v1.2.3 From 9e725b9123c41acf84410cb32d28f729b1e5c9e4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 16 Feb 2011 05:28:40 +0100 Subject: r300g: fix texture border color for float formats --- src/gallium/drivers/r300/r300_state_derived.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5e86d18a746..003fe9a58cd 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -677,7 +677,20 @@ static uint32_t r300_get_border_color(enum pipe_format format, case 16: if (desc->nr_channels <= 2) { border_swizzled[0] = border_swizzled[2]; - util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + } + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + } + break; + + case 32: + if (desc->nr_channels == 1) { + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); } else { util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); } -- cgit v1.2.3 From f53436d821a5173075b2a4a8db8cd23d9669f6e2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Feb 2011 16:51:41 +1000 Subject: r600g: fix typo in previous s3tc commit pointed out by Marek on irc. --- src/gallium/drivers/r600/r600_blit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 06375f72d19..31d5e3f73b6 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -287,7 +287,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, restore_orig[1] = TRUE; /* translate the dst box as well */ dstx = util_format_get_nblocksx(orig_info[1].format, dstx); - dsty = util_format_get_nblocksx(orig_info[1].format, dsty); + dsty = util_format_get_nblocksy(orig_info[1].format, dsty); } r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, -- cgit v1.2.3 From a3c62afa7c7f3435b3c28bee417e652c9bb018e6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 19:12:41 +0100 Subject: nvc0: fix user vertex buffer updates --- src/gallium/drivers/nvc0/nvc0_buffer.c | 19 ++++++++++++------- src/gallium/drivers/nvc0/nvc0_resource.h | 3 +++ src/gallium/drivers/nvc0/nvc0_screen.h | 6 ++++-- src/gallium/drivers/nvc0/nvc0_vbo.c | 25 +++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index ea3e642a448..f16671ac7ff 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) (*mm) = NULL; } -static INLINE boolean -nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, - unsigned domain) +INLINE void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf) { nouveau_bo_ref(NULL, &buf->bo); if (buf->mm) release_allocation(&buf->mm, buf->fence); + buf->domain = 0; +} + +static INLINE boolean +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + nvc0_buffer_release_gpu_storage(buf); + return nvc0_buffer_allocate(screen, buf, domain); } @@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, { struct nvc0_resource *res = nvc0_resource(presource); - nouveau_bo_ref(NULL, &res->bo); - - if (res->mm) - release_allocation(&res->mm, res->fence); + nvc0_buffer_release_gpu_storage(res); if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 17e79642a6d..709e6157f55 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -51,6 +51,9 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *); + boolean nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, unsigned start, unsigned size); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 1fac142e2be..3b676fd21a1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -128,9 +128,11 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); - nvc0_resource_fence(res, flags); + nvc0_resource_fence(res, flags); + } } diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 80e05823759..fb135725c3f 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -171,12 +171,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) nvc0->vbo_fifo = nvc0->vbo_user = 0; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + for (i = 0; i < nvc0->num_vtxbufs; ++i) { vb = &nvc0->vtxbuf[i]; if (!vb->stride) continue; buf = nvc0_resource(vb->buffer); + /* NOTE: user buffers with temporary storage count as mapped by GPU */ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { if (nvc0->vbo_push_hint) { nvc0->vbo_fifo = ~0; @@ -230,14 +233,27 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); - OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); - OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); } nvc0->vbo_dirty = TRUE; } +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + uint32_t vbo_user = nvc0->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer)); + } +} + void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) { @@ -564,6 +580,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->vbo_min_index = info->min_index; nvc0->vbo_max_index = info->max_index; + if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo) + nvc0->dirty |= NVC0_NEW_ARRAYS; + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) nvc0_update_user_vbufs(nvc0); @@ -621,4 +640,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + + nvc0_release_user_vbufs(nvc0); } -- cgit v1.2.3 From 1b4c0c8ea0b4e6065f23f9f2bbb954a7bd2549e4 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 18:27:47 +0100 Subject: nvc0: update the set of formats supported by the 2D engine --- src/gallium/drivers/nvc0/nvc0_surface.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index cc0a65687dc..faa51769313 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -33,25 +33,15 @@ #include "nv50_defs.xml.h" +#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL + /* return TRUE for formats that can be converted among each other by NVC0_2D */ static INLINE boolean nvc0_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nvc0_format_table[format].rt; + + return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if (nvc0_2d_format_faithful(format)) return id; switch (util_format_get_blocksize(format)) { @@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format) return NV50_SURFACE_FORMAT_R16_UNORM; case 4: return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; default: return 0; } -- cgit v1.2.3 From bf1ce9c64b3da731bc6073055abc9f3340ac5a17 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 18:50:03 +0100 Subject: nvc0: use format from the template on surface creation Fixes piglit/fbo-srgb. --- src/gallium/drivers/nvc0/nvc0_miptree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 22f48c8a5fe..ea3ed9e0225 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); ps->context = pipe; - ps->format = pt->format; + ps->format = templ->format; ps->usage = templ->usage; ps->u.tex.level = templ->u.tex.level; ps->u.tex.first_layer = templ->u.tex.first_layer; -- cgit v1.2.3 From 17d680cc537acf8a967d9e36f7006afab560122a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 19:18:19 +0100 Subject: nvc0: force vertex data through FIFO if we need to convert it We may want to put the converted vertex buffer in persistent storage instead, but these are rare corner cases. --- src/gallium/drivers/nvc0/nvc0_stateobj.h | 1 + src/gallium/drivers/nvc0/nvc0_vbo.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 752e927e2aa..5b15e286751 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -67,6 +67,7 @@ struct nvc0_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned vtx_size; unsigned vtx_per_packet_max; struct nvc0_vertex_element element[0]; diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index fb135725c3f..19fd85273c1 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -60,6 +60,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->num_elements = num_elements; so->instance_elts = 0; so->instance_bufs = 0; + so->need_conversion = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; @@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, return NULL; } so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; } so->element[i].state |= i; @@ -263,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - nvc0_prevalidate_vbufs(nvc0); + if (unlikely(vertex->need_conversion)) { + nvc0->vbo_fifo = ~0; + nvc0->vbo_user = 0; + } else { + nvc0_prevalidate_vbufs(nvc0); + } BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); for (i = 0; i < vertex->num_elements; ++i) { -- cgit v1.2.3 From 80a7ae3cc5735b7615c049425b306a53662740a9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 12 Feb 2011 22:02:26 +0100 Subject: nvc0: disable early fragment tests if KIL is used Early-Z pass raises the occlusion counter. --- src/gallium/drivers/nvc0/nvc0_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 613dc431bfd..f7ea97ddb1d 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -633,7 +633,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) if (ti->scan.writes_z) prog->flags[0] = 0x11; /* ? */ else - if (!ti->global_stores) + if (!ti->scan.uses_kill && !ti->global_stores) prog->fp.early_z = 1; ret = nvc0_fp_gen_header(prog, ti); -- cgit v1.2.3 From a24e9bd497d54a373b021370f90144596a37945b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 13 Feb 2011 00:17:43 +0100 Subject: nvc0: clone memory values with multiple refs before modifying them --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 9a7094e5d3c..53010f8bd50 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -732,7 +732,8 @@ struct pass_reld_elim { * The two loads may not overlap but reference adjacent memory locations. */ static void -combine_load(struct mem_record *rec, struct nv_instruction *ld) +combine_load(struct nv_pc *pc, struct mem_record *rec, + struct nv_instruction *ld) { struct nv_instruction *fv = rec->insn; struct nv_value *mem = ld->src[0]->value; @@ -760,6 +761,8 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + if (fv->src[0]->value->refc > 1) + nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; @@ -841,7 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) switch (ld->opcode) { case NV_OP_EXPORT: combine_export(it, ld); break; default: - combine_load(it, ld); + combine_load(ctx->pc, it, ld); break; } } else -- cgit v1.2.3 From 293a8d1b600cd5bd89b3c4c0b6c2bb245d9bd80f Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 14 Feb 2011 02:04:58 +0100 Subject: nvc0: front stencil mask and func mask methods are swapped --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 6 +++--- src/gallium/drivers/nvc0/nvc0_state.c | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 4b1325a3043..59da15ed430 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -575,7 +575,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 #define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 -#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 +#define NVC0_3D_STENCIL_ENABLE 0x00001380 #define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 #define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 @@ -619,9 +619,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 -#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 -#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c #define NVC0_3D_DRAW_TFB_BASE 0x000013a4 diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 7fb91b1191d..7f59d40c5fb 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -284,20 +284,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe, } if (cso->stencil[0].enabled) { - SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); - SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); - SB_DATA (so, cso->stencil[0].writemask); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); } else { - SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); + SB_IMMED_3D(so, STENCIL_ENABLE, 0); } if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); @@ -307,7 +308,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); SB_DATA (so, cso->stencil[1].writemask); SB_DATA (so, cso->stencil[1].valuemask); - } else { + } else + if (cso->stencil[0].enabled) { SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); } -- cgit v1.2.3 From 19f2272e94895cf241f6b05117535e008e07d0a7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 14 Feb 2011 14:10:41 +0100 Subject: nvc0: demagic the clear flags and fix region clears The CLIP_RECTs always affect dedicated clears, and it's nicer than having to mark the viewport or scissor state dirty after it. --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 60 ++++++++++++++++++++++----------- src/gallium/drivers/nvc0/nvc0_screen.c | 14 +++++--- src/gallium/drivers/nvc0/nvc0_surface.c | 20 ++++++----- 3 files changed, 62 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 59da15ed430..7352aa1e99f 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -230,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 #define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 - -#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 #define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) #define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 @@ -370,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + #define NVC0_3D_VERTEX_ID 0x00001118 #define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c @@ -656,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_HEIGHT 0x00001504 #define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 @@ -954,12 +972,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 -#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c -#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 -#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 -#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 -#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 #define NVC0_3D_FP_ZORDER_CTRL 0x0000196c #define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 @@ -1012,6 +1030,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 #define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 +#define NVC0_3D_CLIPID_FILL 0x000019d4 + #define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) #define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 #define NVC0_3D_COLOR_MASK__LEN 0x00000008 diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 88daf31d46a..54510696dc0 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -283,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) BEGIN_RING(chan, RING_3D_(0x074c), 1); OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D_(0x10f8), 1); - OUT_RING (chan, 0x0101); - BEGIN_RING(chan, RING_3D_(0x16a8), 1); OUT_RING (chan, (3 << 16) | 3); BEGIN_RING(chan, RING_3D_(0x1794), 1); @@ -534,11 +531,20 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); OUT_RING (chan, 0); + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index faa51769313..8898bc733a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -243,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, OUT_RING (chan, 1); OUT_RING (chan, 0); - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ - - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, 0x3c); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -300,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, OUT_RING (chan, sf->height); OUT_RING (chan, (1 << 16) | 1); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, mode); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From e7845e319679e3539274c37e9c16692a2dfe59fe Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 14:41:20 +0100 Subject: nvc0: fix clipping and use VIEWPORT instead of SCISSOR --- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 18 +++-- src/gallium/drivers/nvc0/nvc0_screen.c | 2 + src/gallium/drivers/nvc0/nvc0_state.c | 6 +- src/gallium/drivers/nvc0/nvc0_state_validate.c | 100 ++++++++++--------------- src/gallium/drivers/nvc0/nvc0_stateobj.h | 2 - 5 files changed, 58 insertions(+), 70 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 7352aa1e99f..73a605f94e1 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -962,15 +962,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 #define NVC0_3D_CLIP_RECTS_EN 0x0000194c diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 54510696dc0..321d86bdf1a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -550,6 +550,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); OUT_RINGf (chan, 0.0f); OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); /* We use scissors instead of exact view volume clipping, * so they're always enabled. diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 7f59d40c5fb..ae5f335f9f2 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -177,9 +177,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, return NULL; so->pipe = *cso; -#ifndef NVC0_SCISSORS_CLIPPING - SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); -#endif + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ SB_BEGIN_3D(so, SHADE_MODEL, 1); SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 7406f6c7917..96c1198d4cb 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -117,12 +117,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); } - -#ifndef NVC0_SCISSORS_CLIPPING - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, fb->width << 16); - OUT_RING (chan, fb->height << 16); -#endif } static void @@ -164,65 +158,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_scissor_state *s = &nvc0->scissor; -#ifdef NVC0_SCISSORS_CLIPPING - struct pipe_viewport_state *vp = &nvc0->viewport; - int minx, maxx, miny, maxy; - if (!(nvc0->dirty & - (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && - nvc0->state.scissor == nvc0->rast->pipe.scissor) + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) return; nvc0->state.scissor = nvc0->rast->pipe.scissor; - if (nvc0->state.scissor) { - minx = s->minx; - maxx = s->maxx; - miny = s->miny; - maxy = s->maxy; + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + OUT_RING(chan, (s->maxx << 16) | s->minx); + OUT_RING(chan, (s->maxy << 16) | s->miny); } else { - minx = 0; - maxx = nvc0->framebuffer.width; - miny = 0; - maxy = nvc0->framebuffer.height; + OUT_RING(chan, (0xffff << 16) | 0); + OUT_RING(chan, (0xffff << 16) | 0); } - - minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); - maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); - miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); - maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); - - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (maxx << 16) | minx); - OUT_RING (chan, (maxy << 16) | miny); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, ((maxx - minx) << 16) | minx); - OUT_RING (chan, ((maxy - miny) << 16) | miny); -#else - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (s->maxx << 16) | s->minx); - OUT_RING (chan, (s->maxy << 16) | s->miny); -#endif } static void nvc0_validate_viewport(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.translate[0]); - OUT_RINGf (chan, nvc0->viewport.translate[1]); - OUT_RINGf (chan, nvc0->viewport.translate[2]); + OUT_RINGf (chan, vp->translate[0]); + OUT_RINGf (chan, vp->translate[1]); + OUT_RINGf (chan, vp->translate[2]); BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.scale[0]); - OUT_RINGf (chan, nvc0->viewport.scale[1]); - OUT_RINGf (chan, nvc0->viewport.scale[2]); + OUT_RINGf (chan, vp->scale[0]); + OUT_RINGf (chan, vp->scale[1]); + OUT_RINGf (chan, vp->scale[2]); -#ifdef NVC0_SCISSORS_CLIPPING + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = (int)(vp->translate[0] - fabsf(vp->scale[0])); + y = (int)(vp->translate[1] - fabsf(vp->scale[1])); + w = (int)fabsf(2.0f * vp->scale[0]); + h = (int)fabsf(2.0f * vp->scale[1]); + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (w << 16) | x); + OUT_RING (chan, (h << 16) | y); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); - OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); - OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); -#endif + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); } static void @@ -231,10 +214,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; uint32_t clip; - clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; -#ifndef NVC0_SCISSORS_CLIPPING - clip |= 0x1080; -#endif + if (nvc0->clip.depth_clamp) { + clip = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + } else { + clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + } BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); OUT_RING (chan, clip); @@ -418,13 +406,7 @@ static struct state_validate { { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, -#ifdef NVC0_SCISSORS_CLIPPING - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | - NVC0_NEW_RASTERIZER | - NVC0_NEW_FRAMEBUFFER }, -#else - { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, -#endif + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, { nvc0_validate_clip, NVC0_NEW_CLIP }, { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 5b15e286751..57566128ab5 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -4,8 +4,6 @@ #include "pipe/p_state.h" -#define NVC0_SCISSORS_CLIPPING - #define SB_BEGIN_3D(so, m, s) \ (so)->state[(so)->size++] = \ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) -- cgit v1.2.3 From 2fa35eedd9dbc193904256e6004913e94a044158 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 15 Feb 2011 21:36:57 +0100 Subject: nvc0: add missing break statements in constant_operand --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 53010f8bd50..12a31493c75 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -544,6 +544,7 @@ constant_operand(struct nv_pc *pc, nv_reference(pc, nvi, s, nvi->src[t]->value); nvi->src[s]->mod = nvi->src[t]->mod; } + break; case NV_OP_ADD_F32: if (u.u32 == 0) { switch (nvi->src[t]->mod) { @@ -563,6 +564,7 @@ constant_operand(struct nv_pc *pc, if (nvi->opcode != NV_OP_CVT) nvi->src[0]->mod = 0; } + break; case NV_OP_ADD_B32: if (u.u32 == 0) { assert(nvi->src[t]->mod == 0); -- cgit v1.2.3 From bb2c8e709975223e8131b7c627c08eeb4f3ec88d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 11:42:28 +0100 Subject: nvc0: don't swap sources if either value is not in a GPR The memory / immediate source should already be in the only valid position. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 12a31493c75..3d03400518b 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -245,6 +245,9 @@ check_swap_src_0_1(struct nv_instruction *nvi) return; assert(src0 && src1 && src0->value && src1->value); + if (src1->value->reg.file != NV_FILE_GPR) + return; + if (is_cspace_load(src0->value->insn)) { if (!is_cspace_load(src1->value->insn)) { nvi->src[0] = src1; -- cgit v1.2.3 From 3f1361e060822c369f3b375bc695c9e65db59c29 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 11:57:00 +0100 Subject: nvc0: fix emit_dfdx,dfdy --- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index b2a80566a02..c10f920e6f1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -479,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0x99; i->lanes = 4; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -487,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0xa5; i->lanes = 5; + i->src[1] = i->src[0]; emit_quadop(pc, i); } -- cgit v1.2.3 From 3903e25a2cd6c198581021242897b1952d2afea0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 Feb 2011 15:41:32 +0100 Subject: nvc0: fix blend factor mapping --- src/gallium/drivers/nvc0/nvc0_state.c | 57 ++++++++++++++++------------------- 1 file changed, 26 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index ae5f335f9f2..666e3802979 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -51,40 +51,35 @@ nvc0_colormask(unsigned mask) return ret; } +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + static INLINE uint32_t nvc0_blend_fac(unsigned factor) { - static const uint16_t bf[] = { - NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ - NV50_3D_BLEND_FACTOR_ONE, - NV50_3D_BLEND_FACTOR_SRC_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_DST_ALPHA, - NV50_3D_BLEND_FACTOR_DST_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, - NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_SRC1_ALPHA, - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - }; - - assert(factor < (sizeof(bf) / sizeof(bf[0]))); - return bf[factor]; + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } } static void * -- cgit v1.2.3 From 697a3eb832c5440051445bd315b63738a13dc3fd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 21 Sep 2010 11:46:20 +0100 Subject: svga: Don't fake DXT compression ability. --- src/gallium/drivers/svga/svga_resource_texture.c | 10 ---------- src/gallium/drivers/svga/svga_screen.c | 7 ------- 2 files changed, 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index b63c84eee71..3d430737e02 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -146,16 +146,6 @@ svga_translate_format_render(enum pipe_format format) case PIPE_FORMAT_L8_UNORM: return svga_translate_format(format); -#if 1 - /* For on host conversion */ - case PIPE_FORMAT_DXT1_RGB: - return SVGA3D_X8R8G8B8; - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return SVGA3D_A8R8G8B8; -#endif - default: return SVGA3D_FORMAT_INVALID; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 9b699eadcc1..ef1d3098d51 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -365,13 +365,6 @@ svga_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_B5G5R5A1_UNORM: return FALSE; - /* Simulate ability to render into compressed textures */ - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: break; } -- cgit v1.2.3 From 2d1cc27729bd1808a39b226ae3eda5663328ba74 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 16 Feb 2011 20:09:06 +0100 Subject: r300g: fix blitting NPOT compressed textures --- src/gallium/drivers/r300/r300_blit.c | 6 +++--- src/gallium/drivers/r300/r300_texture_desc.c | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 14c9794888c..4f86db39926 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -480,12 +480,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, /* Since the pixels are 4 times larger, we must decrease * the image size and the coordinates 4 times. */ new_src.format = new_dst.format; - new_dst.height0 /= 4; - new_src.height0 /= 4; + new_dst.height0 = (new_dst.height0 + 3) / 4; + new_src.height0 = (new_src.height0 + 3) / 4; dsty /= 4; box = *src_box; box.y /= 4; - box.height /= 4; + box.height = (box.height + 3) / 4; src_box = &box; } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 221e5a314ac..2cfeec7d751 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -162,6 +162,14 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, height = u_minify(tex->tex.height0, level); + /* Mipmapped and 3D textures must have their height aligned to POT. */ + if ((tex->b.b.b.target != PIPE_TEXTURE_1D && + tex->b.b.b.target != PIPE_TEXTURE_2D && + tex->b.b.b.target != PIPE_TEXTURE_RECT) || + tex->b.b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + if (util_format_is_plain(tex->b.b.b.format)) { tile_height = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, @@ -170,14 +178,6 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, DIM_HEIGHT, 0); height = align(height, tile_height); - /* This is needed for the kernel checker, unfortunately. */ - if ((tex->b.b.b.target != PIPE_TEXTURE_1D && - tex->b.b.b.target != PIPE_TEXTURE_2D && - tex->b.b.b.target != PIPE_TEXTURE_RECT) || - tex->b.b.b.last_level != 0) { - height = util_next_power_of_two(height); - } - /* See if the CBZB clear can be used on the buffer, * taking the texture size into account. */ if (out_aligned_for_cbzb) { -- cgit v1.2.3 From fa3f1348e49feeac511dbe5b22bbddc47f56ba81 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 16 Feb 2011 22:23:23 +0100 Subject: r300g: fix a race between CS and SET_TILING ioctls --- src/gallium/drivers/r300/r300_state.c | 7 +------ src/gallium/drivers/r300/r300_texture.c | 2 +- src/gallium/drivers/r300/r300_winsys.h | 2 ++ src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 13 +++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 12 ++++++------ src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 ++ 6 files changed, 25 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index aa18ab7d1e3..5d8298341d3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -624,12 +624,7 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300, * Skip changing the flags otherwise. */ if (tex->tex.macrotile[tex->surface_level] != tex->tex.macrotile[level]) { - /* Tiling determines how DRM treats the buffer data. - * We must flush CS when changing it if the buffer is referenced. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf)) - r300->context.flush(&r300->context, 0, NULL); - - r300->rws->buffer_set_tiling(tex->buf, + r300->rws->buffer_set_tiling(tex->buf, r300->cs, tex->tex.microtile, tex->tex.macrotile[level], tex->tex.stride_in_bytes[0]); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 45a896d6109..354144cac79 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -800,7 +800,7 @@ r300_texture_create_object(struct r300_screen *rscreen, tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); - rws->buffer_set_tiling(tex->buf, + rws->buffer_set_tiling(tex->buf, NULL, tex->tex.microtile, tex->tex.macrotile[0], tex->tex.stride_in_bytes[0]); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 6733253ccc9..d5c73585c81 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -168,6 +168,7 @@ struct r300_winsys_screen { * Set tiling flags describing a memory layout of a buffer object. * * \param buf A winsys buffer object to set the flags for. + * \param cs A command stream to flush if the buffer is referenced by it. * \param macrotile A macrotile flag. * \param microtile A microtile flag. * \param stride A stride of the buffer in bytes, for texturing. @@ -175,6 +176,7 @@ struct r300_winsys_screen { * \note microtile and macrotile are not bitmasks! */ void (*buffer_set_tiling)(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index afb8131acbe..3094337a3cd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -394,13 +394,26 @@ static void radeon_bo_get_tiling(struct r300_winsys_bo *_buf, } static void radeon_bo_set_tiling(struct r300_winsys_bo *_buf, + struct r300_winsys_cs *rcs, enum r300_buffer_tiling microtiled, enum r300_buffer_tiling macrotiled, uint32_t pitch) { struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct drm_radeon_gem_set_tiling args = {}; + /* Tiling determines how DRM treats the buffer data. + * We must flush CS when changing it if the buffer is referenced. */ + if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { + radeon_drm_cs_flush(rcs); + radeon_drm_cs_sync_flush(rcs); + } + + while (p_atomic_read(&bo->num_active_ioctls)) { + sched_yield(); + } + if (microtiled == R300_BUFFER_TILED) args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE; else if (microtiled == R300_BUFFER_SQUARETILED) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index b4f5c9f6a88..8f6f4a1f35a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -330,7 +330,7 @@ static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, OUT_CS(&cs->base, index * RELOC_DWORDS); } -static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_async, param) +static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { struct radeon_cs_context *csc = (struct radeon_cs_context*)param; unsigned i; @@ -355,7 +355,7 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_async, param) return NULL; } -static void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs) +void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); @@ -368,7 +368,7 @@ static void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs) DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) -static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +void radeon_drm_cs_flush(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; @@ -385,10 +385,10 @@ static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); if (debug_get_option_thread()) { - cs->thread = pipe_thread_create(radeon_drm_cs_emit_async, cs->csc); + cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc); assert(cs->thread); } else { - radeon_drm_cs_emit_async(cs->csc); + radeon_drm_cs_emit_ioctl(cs->csc); } } @@ -440,7 +440,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; - ws->base.cs_flush = radeon_drm_cs_emit; + ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; ws->base.cs_set_flush = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 486fd237fc9..4cc97f37e09 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -96,6 +96,8 @@ static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) return bo->num_cs_references; } +void radeon_drm_cs_flush(struct r300_winsys_cs *rcs); +void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); #endif -- cgit v1.2.3 From fa05ddca156ee21a4c0e00aaec0c3f8347dd194d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Feb 2011 21:52:49 +0000 Subject: svga: Proper redefine_user_buffer implementation. Unfortunately still not enough to make GoogleEarth happy. --- src/gallium/drivers/svga/svga_pipe_vertex.c | 1 - src/gallium/drivers/svga/svga_resource.c | 1 + src/gallium/drivers/svga/svga_resource_buffer.h | 6 +++ .../drivers/svga/svga_resource_buffer_upload.c | 51 ++++++++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 6bf37fbbbaf..58469910732 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -132,7 +132,6 @@ void svga_init_vertex_functions( struct svga_context *svga ) svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; - svga->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index ef2a0c40f03..bed15ec02e5 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -40,6 +40,7 @@ svga_init_resource_functions(struct svga_context *svga) svga->pipe.transfer_unmap = u_transfer_unmap_vtbl; svga->pipe.transfer_destroy = u_transfer_destroy_vtbl; svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl; + svga->pipe.redefine_user_buffer = svga_redefine_user_buffer; } void diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index d3ec11bfd52..c559f70ec12 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -243,4 +243,10 @@ svga_winsys_buffer_create(struct svga_context *svga, unsigned usage, unsigned size); +void +svga_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); + #endif /* SVGA_BUFFER_H */ diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index fdc0329f6c9..76a3803224a 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -649,3 +649,54 @@ svga_context_flush_buffers(struct svga_context *svga) next = curr->next; } } + + +void +svga_redefine_user_buffer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_buffer *sbuf = svga_buffer(resource); + + assert(sbuf->user); + + /* + * Release any uploaded user buffer. + * + * TODO: As an optimization, we could try to update the uploaded buffer + * instead. + */ + + pipe_resource_reference(&sbuf->uploaded.buffer, NULL); + + pipe_mutex_lock(ss->swc_mutex); + + if (offset + size > resource->width0) { + /* + * User buffers shouldn't have DMA directly, unless + * SVGA_COMBINE_USERBUFFERS is not set. + */ + + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + } + + if (sbuf->handle) { + svga_buffer_destroy_host_surface(ss, sbuf); + } + + if (sbuf->hwbuf) { + svga_buffer_destroy_hw_storage(ss, sbuf); + } + + sbuf->key.size.width = sbuf->b.b.width0 = offset + size; + } + + pipe_mutex_unlock(ss->swc_mutex); + + svga->curr.any_user_vertex_buffers = TRUE; + svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; +} -- cgit v1.2.3 From 4b6c9b799b769863286461de1ab045cea8484cba Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Feb 2011 14:30:31 -0700 Subject: svga: disable a debug_printf() call --- src/gallium/drivers/svga/svga_resource_buffer.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index f12e2b68627..11c868a89b1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -105,9 +105,12 @@ svga_buffer_map_range( struct pipe_screen *screen, * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", - __FUNCTION__, - (sbuf->b.b.width0 + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting DMA transfers\n", + __FUNCTION__, + (sbuf->b.b.width0 + 1023)/1024); + } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); } -- cgit v1.2.3 From d432f462c28b60e1b1a2bcb4e17bbda0ea316d2a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Feb 2011 13:24:21 -0700 Subject: svga: dimension the dirty[] array with SVGA_STATE_MAX --- src/gallium/drivers/svga/svga_context.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 37ca417d2ff..106ea8d0e51 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -35,6 +35,8 @@ #include "tgsi/tgsi_scan.h" +#include "svga_state.h" + #define SVGA_TEX_UNITS 8 #define SVGA_MAX_POINTSIZE 80.0 @@ -342,7 +344,7 @@ struct svga_context struct util_bitmask *vs_bm; struct { - unsigned dirty[4]; + unsigned dirty[SVGA_STATE_MAX]; unsigned texture_timestamp; -- cgit v1.2.3 From 64762af008ec9333c7f3561ea3c930d407c911c7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Feb 2011 13:24:41 -0700 Subject: svga: fix incorrect comment --- src/gallium/drivers/svga/svga_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 106ea8d0e51..6fd0bdb38b8 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -246,7 +246,7 @@ struct svga_prescale { }; -/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT ) +/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR ) */ struct svga_hw_clear_state { -- cgit v1.2.3 From 2f5032ec1e2adcac3e6e990624f05900a459f270 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 15 Feb 2011 14:06:21 -0700 Subject: svga: use TRUE/FALSE instead of 0/1 Some fields are booleans, others are bitmasks. Use TRUE/FALSE to clarify what's what. --- src/gallium/drivers/svga/svga_state_need_swtnl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index bfd77f6b860..68c02578789 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -179,15 +179,15 @@ static int update_need_swtnl( struct svga_context *svga, boolean need_swtnl; if (svga->debug.no_swtnl) { - svga->state.sw.need_swvfetch = 0; - svga->state.sw.need_pipeline = 0; + svga->state.sw.need_swvfetch = FALSE; + svga->state.sw.need_pipeline = FALSE; } need_swtnl = (svga->state.sw.need_swvfetch || svga->state.sw.need_pipeline); if (svga->debug.force_swtnl) { - need_swtnl = 1; + need_swtnl = TRUE; } /* @@ -196,7 +196,7 @@ static int update_need_swtnl( struct svga_context *svga, * the wrong buffers and vertex formats. Try trivial/line-wide. */ if (svga->state.sw.in_swtnl_draw) - need_swtnl = 1; + need_swtnl = TRUE; if (need_swtnl != svga->state.sw.need_swtnl) { SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, -- cgit v1.2.3 From c8f8d7d873269bd2e01f088725f754f9413102b5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Feb 2011 14:20:14 -0700 Subject: svga: flush when transitioning between HW and SW rendering paths To avoid mixing HW and SW rendering with the same vertex buffer. --- src/gallium/drivers/svga/svga_context.h | 3 +++ src/gallium/drivers/svga/svga_pipe_draw.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 6fd0bdb38b8..4d4f50366ea 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -367,6 +367,9 @@ struct svga_context /** List of buffers with queued transfers */ struct list_head dirty_buffers; + + /** Was the previous draw done with the SW path? */ + boolean prev_draw_swtnl; }; /* A flag for each state_tracker state object: diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 001ec3616c4..d98b9b0e000 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -157,6 +157,14 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!u_trim_pipe_prim( info->mode, &count )) return; + if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) { + /* We're switching between SW and HW drawing. Do a flush to avoid + * mixing HW and SW rendering with the same vertex buffer. + */ + pipe->flush(pipe, ~0, NULL); + svga->prev_draw_swtnl = svga->state.sw.need_swtnl; + } + /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. -- cgit v1.2.3 From 5cc35124b31aa1dddffd24d92c8447cf7a9d9f98 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 17 Feb 2011 10:24:15 +1000 Subject: r600g: add missing type to color buffer swap. --- src/gallium/drivers/r600/r600_state_inlines.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index f980610f463..7d5c9e0a050 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -332,6 +332,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_0280A0_SWAP_STD; -- cgit v1.2.3 From 231bf886dae9c7df0ae3e16acee904024a08824f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 17 Feb 2011 10:25:57 +1000 Subject: r600g: get s3tc working on cards with crappy 64/128 bit types. Some cards don't appear to work correctly with the UNORM type, so switch to the integer type, however since gallium has no integer types yet from what I can see we need to do a hack to workaround it for the blitter. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 11 +++++++++++ src/gallium/drivers/r600/r600_blit.c | 5 +++++ src/gallium/drivers/r600/r600_resource.h | 4 ++++ src/gallium/drivers/r600/r600_state.c | 10 ++++++++++ 4 files changed, 30 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 261dd8d6ab8..3efdbaba0c3 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -385,6 +385,12 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + + if (tmp->force_int_type) { + word4 &= C_030010_NUM_FORMAT_ALL; + word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); + } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -673,6 +679,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_ARRAY_MODE(rtex->array_mode[level]) | diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 31d5e3f73b6..9865ea17ae5 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -229,6 +229,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; unsigned pixsize = util_format_get_blocksize(tex->format); int new_format; int new_height, new_width; @@ -245,6 +246,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex, new_width = util_format_get_nblocksx(tex->format, orig->width0); new_height = util_format_get_nblocksy(tex->format, orig->height0); + rtex->force_int_type = true; tex->width0 = new_width; tex->height0 = new_height; tex->format = new_format; @@ -255,6 +257,9 @@ static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + rtex->force_int_type = false; + tex->format = orig->format; tex->width0 = orig->width0; tex->height0 = orig->height0; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index fdcfcd50a10..836e7491f1f 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -64,6 +64,10 @@ struct r600_resource_texture { unsigned dirty_db; struct r600_resource_texture *flushed_depth_texture; boolean is_flushing_texture; + + /* on some cards we have to use integer 64/128-bit types + for s3tc blits, do this until gallium grows int formats */ + boolean force_int_type; }; #define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5e6821004df..a1f83ac4271 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -434,6 +434,11 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + + if (tmp->force_int_type) { + word4 &= C_038010_NUM_FORMAT_ALL; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + } rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -724,6 +729,11 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | -- cgit v1.2.3 From b1d485712fb90ef243384cd646822ba1f460314b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Feb 2011 07:23:40 -0700 Subject: softpipe: rename env vars to be consistent --- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fe54f92addf..70fdfb7ddf3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -230,7 +230,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE ); softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = NULL; @@ -315,7 +315,7 @@ softpipe_create_context( struct pipe_screen *screen, (struct tgsi_sampler **) softpipe->tgsi.geom_samplers_list); - if (debug_get_bool_option( "SP_NO_RAST", FALSE )) + if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE )) softpipe->no_rast = TRUE; softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe); -- cgit v1.2.3 From 0adeaf00e6c4592e78cca36c3b365110b83c965d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 17 Feb 2011 10:09:06 +0000 Subject: svga: Don't use more than one constant per IFC instruction. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 5f262b3eadb..97d91046427 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -746,7 +746,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit, static boolean emit_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { - const struct src_register src = translate_src_register( + struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); @@ -754,10 +754,23 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { + /* + * Max different constant registers readable per IFC instruction is 1. + */ + + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) + return FALSE; + + src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); + } + emit->dynamic_branching_level++; return (emit_instruction( emit, if_token ) && - emit_src( emit, src ) && + emit_src( emit, src0 ) && emit_src( emit, zero ) ); } -- cgit v1.2.3 From 2a6cce09e356a33337dbc193d8a580d0faab8915 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Feb 2011 09:34:53 +1000 Subject: r600g: add BC4/5 to RGTC conversion this doesn't do anything much since the rest of mesa doesn't support RGTC yet. --- src/gallium/drivers/r600/r600_texture.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index cc36d94cd7a..23212314ba6 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -895,6 +895,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + result = FMT_BC4; + goto out_word4; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + result = FMT_BC5; + goto out_word4; default: goto out_unknown; } -- cgit v1.2.3 From 51cc14471cffcec9c44eeee947be7533352ac62a Mon Sep 17 00:00:00 2001 From: Fabian Bieler Date: Tue, 15 Feb 2011 13:00:49 +0100 Subject: r600g: Add support to dump vertex- and texture-fetch clauses Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_asm.c | 57 ++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index d687c23f4f2..f8835f020d9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1832,11 +1832,66 @@ void r600_bc_dump(struct r600_bc *bc) } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - //TODO + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", tex->inst); + fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); + fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); + fprintf(stderr, "REL:%d)\n", tex->src_rel); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); + fprintf(stderr, "REL:%d ", tex->dst_rel); + fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); + fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); + fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); + fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); + fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); + fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); + fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); + fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); + fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); + fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); + fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; } LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", vtx->inst); + fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); + fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); + id++; + /* This assumes that no semantic fetches exist */ + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); + fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); + fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); + fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); + fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format); + fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all); + fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all); + fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); //TODO + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; } } -- cgit v1.2.3 From 8b5119aab30828edbc4310694c09fe02fdbb0552 Mon Sep 17 00:00:00 2001 From: Fabian Bieler Date: Wed, 16 Feb 2011 10:02:47 +0100 Subject: r600g: Start a new TEX clause if the texture lookup address was fetched in the current clause Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_asm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f8835f020d9..de796188fde 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1282,6 +1282,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return -ENOMEM; memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + /* we can't fetch data und use it as texture lookup address in the same TEX clause */ + if (bc->cf_last != NULL && + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + struct r600_bc_tex *ttex; + LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { + if (ttex->dst_gpr == ntex->src_gpr) { + bc->force_add_cf = 1; + break; + } + } + } + /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || -- cgit v1.2.3 From dfa59284049e735cb55e1b39f32cfcfb71ebbff3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Feb 2011 15:59:58 +1000 Subject: r600g: reorganise rgtc pieces. when the cs checker fixes go upstream a lot of this can disappear into a drm version check. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_texture.c | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 23212314ba6..ce06d74058f 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -808,6 +808,8 @@ uint32_t r600_translate_texformat(enum pipe_format format, uint32_t result = 0, word4 = 0, yuv_format = 0; const struct util_format_description *desc; boolean uniform = TRUE; + static int r600_enable_s3tc = -1; + int i; const uint32_t sign_bit[4] = { S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), @@ -865,13 +867,28 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } - /* S3TC formats. TODO */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + if (r600_enable_s3tc == -1) + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + if (!r600_enable_s3tc) + goto out_unknown; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + result = FMT_BC4; + goto out_word4; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + result = FMT_BC5; + goto out_word4; + default: + goto out_unknown; + } + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { if (!r600_enable_s3tc) goto out_unknown; @@ -895,14 +912,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; - case PIPE_FORMAT_RGTC1_UNORM: - case PIPE_FORMAT_RGTC1_SNORM: - result = FMT_BC4; - goto out_word4; - case PIPE_FORMAT_RGTC2_UNORM: - case PIPE_FORMAT_RGTC2_SNORM: - result = FMT_BC5; - goto out_word4; default: goto out_unknown; } @@ -917,8 +926,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* R8G8Bx_SNORM - TODO CxV8U8 */ - /* RGTC - TODO */ - /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; -- cgit v1.2.3 From 8cbd3b5ef11e3e77022f98f3151dd39e0eec188c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 18 Feb 2011 11:36:34 +0100 Subject: gallium/svga: Fix unnecessary swtnl fallbacks When we drop the in_swtnl_draw flag, we must force a rerun of update_need_swtnl to reset the need_swtnl flag to its correct value outside of a swtnl vbo draw. Signed-off-by: Thomas Hellstrom --- src/gallium/drivers/svga/svga_swtnl_draw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 05d86e1fb16..ad29c1b6425 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -124,6 +124,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, /* Now safe to remove the need_swtnl flag in any update_state call */ svga->state.sw.in_swtnl_draw = FALSE; + svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH; return ret; } -- cgit v1.2.3 From 99d955263ba4665462f8ebd8ed643019c2cd9621 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 17 Feb 2011 17:14:44 +0000 Subject: svga: Make sure that refcnt debugger gets the correct backtrace for create Signed-off-by: Jakob Bornecrantz --- src/gallium/drivers/svga/svga_resource_buffer.c | 6 ++++++ src/gallium/drivers/svga/svga_resource_texture.c | 3 +++ 2 files changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 11c868a89b1..e1f07d655b9 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -311,6 +311,9 @@ svga_buffer_create(struct pipe_screen *screen, goto error2; } + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &sbuf->b.b; error2: @@ -344,6 +347,9 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->swbuf = ptr; sbuf->user = TRUE; + + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); return &sbuf->b.b; diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 3d430737e02..3c6176a5e86 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -565,6 +565,9 @@ svga_texture_create(struct pipe_screen *screen, if (tex->handle) SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); + debug_reference(&tex->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &tex->b.b; error2: -- cgit v1.2.3 From 912ad8874200c0a89bd23663dc0de378f6691140 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 17 Feb 2011 14:58:55 +0000 Subject: svga: Describe svga_sampler_views for refcnt debugging Signed-off-by: Jakob Bornecrantz --- src/gallium/drivers/svga/svga_sampler_view.c | 16 ++++++++++++++++ src/gallium/drivers/svga/svga_sampler_view.h | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 079046e4686..4d9ac6c324a 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -32,6 +32,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_context.h" @@ -41,6 +42,14 @@ #include "svga_surface.h" +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv) +{ + char res[128]; + debug_describe_resource(res, sv->texture); + util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod); +} + struct svga_sampler_view * svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, @@ -114,6 +123,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -137,6 +148,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, assert(0); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -144,6 +157,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, svga_sampler_view_reference(&tex->cached_view, sv); pipe_mutex_unlock(ss->tex_mutex); + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + return sv; } diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h index e64665f2e58..2087c1be85e 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.h +++ b/src/gallium/drivers/svga/svga_sampler_view.h @@ -83,12 +83,16 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * void svga_destroy_sampler_view_priv(struct svga_sampler_view *v); +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv); + static INLINE void svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v) { struct svga_sampler_view *old = *ptr; - if (pipe_reference(&(*ptr)->reference, &v->reference)) + if (pipe_reference_described(&(*ptr)->reference, &v->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view)) svga_destroy_sampler_view_priv(old); *ptr = v; } -- cgit v1.2.3 From e0481cac7d57757d75a39763a1dd36b915979bb4 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 17 Feb 2011 17:16:14 +0000 Subject: svga: Disable surface cache for textures Signed-off-by: Jakob Bornecrantz --- src/gallium/drivers/svga/svga_resource_texture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 3c6176a5e86..3bb6ecf81f0 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -521,7 +521,8 @@ svga_texture_create(struct pipe_screen *screen, tex->key.numFaces = 1; } - tex->key.cachable = 1; + /* XXX: Disabled for now */ + tex->key.cachable = 0; if (template->bind & PIPE_BIND_SAMPLER_VIEW) tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; -- cgit v1.2.3 From fd8d4b32ede6ebeae332539b71d38c36420e2654 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 18 Feb 2011 15:29:00 +0100 Subject: r300g: remove tracking whether vertex buffers need to be validated This was getting hard to maintain and didn't really bring any real benefits. Instead, validate buffers when the vertex array state is dirty. --- src/gallium/drivers/r300/r300_context.h | 7 -- src/gallium/drivers/r300/r300_emit.c | 2 +- src/gallium/drivers/r300/r300_flush.c | 4 -- src/gallium/drivers/r300/r300_render.c | 85 ++++++++---------------- src/gallium/drivers/r300/r300_render_translate.c | 3 - src/gallium/drivers/r300/r300_screen_buffer.c | 5 -- src/gallium/drivers/r300/r300_state.c | 9 --- 7 files changed, 27 insertions(+), 88 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 883b5f99c07..30073759476 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -575,13 +575,6 @@ struct r300_context { boolean vertex_arrays_dirty; boolean vertex_arrays_indexed; int vertex_arrays_offset; - - /* Whether any buffer (FB, textures, VBOs) has been set, but buffers - * haven't been validated yet. */ - boolean validate_buffers; - /* Whether user buffers have been validated. */ - boolean upload_vb_validated; - boolean upload_ib_validated; }; #define foreach_atom(r300, atom) \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index bd864b96167..e2e4719ec82 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1181,7 +1181,7 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, r300_resource(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ - if (do_validate_vertex_buffers) { + if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + r300->vbuf_mgr->nr_real_vertex_buffers; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index c4bb332aec3..c77cc08539d 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -77,10 +77,6 @@ static void r300_flush(struct pipe_context* pipe, r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; } - - r300->validate_buffers = TRUE; - r300->upload_vb_validated = FALSE; - r300->upload_ib_validated = FALSE; } else { if (rfence) { /* We have to create a fence object, but the command stream is empty diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 7dc8ff08fa4..2ead8667bda 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -239,8 +239,7 @@ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, int buffer_offset, - int index_bias, - boolean user_buffers) + int index_bias) { boolean first_draw = flags & PREP_FIRST_DRAW; boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; @@ -250,24 +249,11 @@ static boolean r300_emit_states(struct r300_context *r300, /* Validate buffers and emit dirty state if needed. */ if (first_draw) { - if (r300->validate_buffers) { - if (!r300_emit_buffer_validate(r300, validate_vbos, - index_buffer)) { - fprintf(stderr, "r300: CS space validation failed. " - "(not enough memory?) Skipping rendering.\n"); - return FALSE; - } - - /* Consider the validation done only if everything was validated. */ - if (validate_vbos) { - r300->validate_buffers = FALSE; - if (user_buffers) - r300->upload_vb_validated = TRUE; - if (r300->index_buffer.buffer && - r300_resource(r300->index_buffer.buffer)->b.user_ptr) { - r300->upload_ib_validated = TRUE; - } - } + if (!r300_emit_buffer_validate(r300, validate_vbos, + index_buffer)) { + fprintf(stderr, "r300: CS space validation failed. " + "(not enough memory?) Skipping rendering.\n"); + return FALSE; } r300_emit_dirty_state(r300); @@ -312,14 +298,13 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, struct pipe_resource *index_buffer, unsigned cs_dwords, int buffer_offset, - int index_bias, - boolean user_buffers) + int index_bias) { if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_FIRST_DRAW; return r300_emit_states(r300, flags, index_buffer, buffer_offset, - index_bias, user_buffers); + index_bias); } static boolean immd_is_good_idea(struct r300_context *r300, @@ -394,8 +379,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, - FALSE)) + if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ @@ -545,8 +529,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, static void r300_draw_elements_immediate(struct r300_context *r300, int indexBias, unsigned minIndex, unsigned maxIndex, unsigned mode, - unsigned start, unsigned count, - boolean user_buffers) + unsigned start, unsigned count) { uint8_t *ptr1; uint16_t *ptr2; @@ -558,8 +541,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias, - user_buffers)) + PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias)) return; r300_emit_draw_init(r300, mode, minIndex, maxIndex); @@ -632,8 +614,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, static void r300_draw_elements(struct r300_context *r300, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count, - boolean user_buffers) + unsigned mode, unsigned start, unsigned count) { struct pipe_resource *indexBuffer = r300->index_buffer.buffer; unsigned indexSize = r300->index_buffer.index_size; @@ -681,8 +662,7 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias, - user_buffers)) + PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias)) goto done; if (alt_num_verts || count <= 65535) { @@ -706,7 +686,7 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 19, buffer_offset, indexBias, user_buffers)) + indexBuffer, 19, buffer_offset, indexBias)) goto done; } } while (count); @@ -719,8 +699,7 @@ done: } static void r300_draw_arrays(struct r300_context *r300, unsigned mode, - unsigned start, unsigned count, - boolean user_buffers) + unsigned start, unsigned count) { boolean alt_num_verts = r300->screen->caps.is_r500 && count > 65536 && @@ -730,7 +709,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0, user_buffers)) + NULL, 9, start, 0)) return; if (alt_num_verts || count <= 65535) { @@ -747,7 +726,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0, user_buffers)) + start, 0)) return; } } while (count); @@ -774,13 +753,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, &buffers_updated, &uploader_flushed); if (buffers_updated) { r300->vertex_arrays_dirty = TRUE; - - if (uploader_flushed || !r300->upload_vb_validated) { - r300->upload_vb_validated = FALSE; - r300->validate_buffers = TRUE; - } - } else { - r300->upload_vb_validated = FALSE; } /* Draw. */ @@ -791,19 +763,16 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300_resource(r300->index_buffer.buffer)->b.user_ptr) { r300_draw_elements_immediate(r300, info->index_bias, info->min_index, max_index, - info->mode, start_indexed, count, - buffers_updated); + info->mode, start_indexed, count); } else { r300_draw_elements(r300, info->index_bias, info->min_index, - max_index, info->mode, start_indexed, count, - buffers_updated); + max_index, info->mode, start_indexed, count); } } else { if (immd_is_good_idea(r300, count)) { r300_draw_arrays_immediate(r300, info->mode, info->start, count); } else { - r300_draw_arrays(r300, info->mode, info->start, count, - buffers_updated); + r300_draw_arrays(r300, info->mode, info->start, count); } } @@ -935,7 +904,6 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; - r300->validate_buffers = TRUE; } r300render->vertex_size = vertex_size; @@ -1022,12 +990,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0, FALSE)) + NULL, dwords, 0, 0)) return; } else { if (!r300_emit_states(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0, FALSE)) + NULL, 0, 0)) return; } @@ -1062,12 +1030,12 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, FALSE)) + NULL, 256, 0, 0)) return; } else { if (!r300_emit_states(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0, FALSE)) + NULL, 0, 0)) return; } @@ -1104,7 +1072,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0, FALSE)) + NULL, 256, 0, 0)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1208,8 +1176,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, - FALSE)) + if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index 777857514fc..f8c7558f4b4 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -48,7 +48,6 @@ void r300_translate_index_buffer(struct r300_context *r300, pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; *start = out_offset / 2; - r300->validate_buffers = TRUE; break; case 2: @@ -63,7 +62,6 @@ void r300_translate_index_buffer(struct r300_context *r300, *index_buffer = NULL; pipe_resource_reference(index_buffer, out_buffer); *start = out_offset / 2; - r300->validate_buffers = TRUE; } break; @@ -79,7 +77,6 @@ void r300_translate_index_buffer(struct r300_context *r300, *index_buffer = NULL; pipe_resource_reference(index_buffer, out_buffer); *start = out_offset / 4; - r300->validate_buffers = TRUE; } break; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 7855d70a973..1045911f3ae 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -65,11 +65,6 @@ void r300_upload_index_buffer(struct r300_context *r300, index_buffer, &flushed); *start = index_offset / index_size; - - if (flushed || !r300->upload_ib_validated) { - r300->upload_ib_validated = FALSE; - r300->validate_buffers = TRUE; - } } static void r300_buffer_destroy(struct pipe_screen *screen, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 5d8298341d3..09f18b3e624 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -783,7 +783,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->validate_buffers = TRUE; if (state->zsbuf) { switch (util_format_get_blocksize(state->zsbuf->texture->format)) { @@ -1359,7 +1358,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, state->sampler_view_count = count; r300_mark_atom_dirty(r300, &r300->textures_state); - r300->validate_buffers = TRUE; if (dirty_tex) { r300_mark_atom_dirty(r300, &r300->texture_cache_inval); @@ -1491,7 +1489,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, for (i = 0; i < count; i++) { if (buffers[i].buffer && !r300_resource(buffers[i].buffer)->b.user_ptr) { - r300->validate_buffers = TRUE; } } r300->vertex_arrays_dirty = TRUE; @@ -1512,12 +1509,6 @@ static void r300_set_index_buffer(struct pipe_context* pipe, pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); r300->index_buffer.offset /= r300->index_buffer.index_size; - - if (r300->screen->caps.has_tcl && - !r300_resource(ib->buffer)->b.user_ptr) { - r300->validate_buffers = TRUE; - r300->upload_ib_validated = FALSE; - } } else { pipe_resource_reference(&r300->index_buffer.buffer, NULL); -- cgit v1.2.3 From 965ab5fed3c734e6205070e6cf40544a44b5dbf6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 13:24:12 +0000 Subject: svga: Preserve src swizzles in submit_op2/3/4. Several opcodes require scalar swizzle, and this requirement was being was not being observed when creating temporaries for other reasons. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 70 ++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 97d91046427..a868108098e 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -284,6 +284,41 @@ static void reset_temp_regs( struct svga_shader_emitter *emit ) } +/* Replace the src with the temporary specified in the dst, but copying + * only the necessary channels, and preserving the original swizzle (which is + * important given that several opcodes have constraints in the allowed + * swizzles). + */ +static boolean emit_repl( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register *src0) +{ + unsigned src0_swizzle; + unsigned chan; + + assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); + + src0_swizzle = src0->base.swizzle; + + dst.mask = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; + dst.mask |= 1 << swizzle; + } + assert(dst.mask); + + src0->base.swizzle = SVGA3DSWIZZLE_NONE; + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) + return FALSE; + + *src0 = src( dst ); + src0->base.swizzle = src0_swizzle; + + return TRUE; +} + + static boolean submit_op0( struct svga_shader_emitter *emit, SVGA3dShaderInstToken inst, SVGA3dShaderDestToken dest ) @@ -332,14 +367,11 @@ static boolean submit_op2( struct svga_shader_emitter *emit, src0.base.num != src1.base.num) need_temp = TRUE; - if (need_temp) - { + if (need_temp) { temp = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 )) + if (!emit_repl( emit, temp, &src0 )) return FALSE; - - src0 = src( temp ); } if (!emit_op2( emit, inst, dest, src0, src1 )) @@ -395,24 +427,18 @@ static boolean submit_op3( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) need_temp1 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp1) - { + if (need_temp1) { temp1 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 )) + if (!emit_repl( emit, temp1, &src1 )) return FALSE; - - src1 = src( temp1 ); } if (!emit_op3( emit, inst, dest, src0, src1, src2 )) @@ -477,24 +503,18 @@ static boolean submit_op4( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) need_temp3 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp3) - { + if (need_temp3) { temp3 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + if (!emit_repl( emit, temp3, &src3 )) return FALSE; - - src3 = src( temp3 ); } if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) -- cgit v1.2.3 From 15c3e21097ba6d410daaff525eb4eeeb5e1e481a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 13:53:45 +0000 Subject: svga: Ensure LRP's restrictions are observed in all uses. The dst reg must be a temporary, and not be the same as src0 or src2. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 89 ++++++++++++++++++------------- 1 file changed, 51 insertions(+), 38 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index a868108098e..f5842eff3d2 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -528,6 +528,55 @@ static boolean submit_op4( struct svga_shader_emitter *emit, } +static boolean alias_src_dst( struct src_register src, + SVGA3dShaderDestToken dst ) +{ + if (src.base.num != dst.num) + return FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != + SVGA3dShaderGetRegType(src.base.value)) + return FALSE; + + return TRUE; +} + + +static boolean submit_lrp(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register src2) +{ + SVGA3dShaderDestToken tmp; + boolean need_dst_tmp = FALSE; + + /* The dst reg must be a temporary, and not be the same as src0 or src2 */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + tmp = get_temp( emit ); + tmp.mask = dst.mask; + } + else { + tmp = dst; + } + + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; + + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -864,7 +913,7 @@ static boolean emit_cmp(struct svga_shader_emitter *emit, */ if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero)) return FALSE; - return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2); + return submit_lrp(emit, dst, src(temp), src1, src2); } /* CMP DST, SRC0, SRC2, SRC1 */ @@ -1691,19 +1740,6 @@ static boolean emit_arl(struct svga_shader_emitter *emit, } } -static boolean alias_src_dst( struct src_register src, - SVGA3dShaderDestToken dst ) -{ - if (src.base.num != dst.num) - return FALSE; - - if (SVGA3dShaderGetRegType(dst.value) != - SVGA3dShaderGetRegType(src.base.value)) - return FALSE; - - return TRUE; -} - static boolean emit_pow(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -1796,37 +1832,14 @@ static boolean emit_lrp(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( emit, &insn->Src[2] ); - boolean need_dst_tmp = FALSE; - /* The dst reg must not be the same as src0 or src2 */ - if (alias_src_dst(src0, dst) || - alias_src_dst(src2, dst)) - need_dst_tmp = TRUE; - - if (need_dst_tmp) { - tmp = get_temp( emit ); - tmp.mask = dst.mask; - } - else { - tmp = dst; - } - - if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) - return FALSE; - - if (need_dst_tmp) { - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; - } - - return TRUE; + return submit_lrp(emit, dst, src0, src1, src2); } -- cgit v1.2.3 From 0cb6329e8941dc7937c8aeea4b2e83c77881bd3a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 14:29:48 +0000 Subject: svga: Ensure SWTNL is created after HWTNL. Matches the internal driver layering, and prevents null svga->hwtnl dereferencing from inside the swtnl. --- src/gallium/drivers/svga/svga_context.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 2b8a70d18f1..9bcf6571bd3 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -126,9 +126,6 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga->debug.no_line_width = debug_get_option_no_line_width(); svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple(); - if (!svga_init_swtnl(svga)) - goto no_swtnl; - svga->fs_bm = util_bitmask_create(); if (svga->fs_bm == NULL) goto no_fs_bm; @@ -157,6 +154,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, if (svga->hwtnl == NULL) goto no_hwtnl; + if (!svga_init_swtnl(svga)) + goto no_swtnl; ret = svga_emit_initial_state( svga ); if (ret) @@ -179,6 +178,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, return &svga->pipe; no_state: + svga_destroy_swtnl(svga); +no_swtnl: svga_hwtnl_destroy( svga->hwtnl ); no_hwtnl: u_upload_destroy( svga->upload_vb ); @@ -189,8 +190,6 @@ no_upload_ib: no_vs_bm: util_bitmask_destroy( svga->fs_bm ); no_fs_bm: - svga_destroy_swtnl(svga); -no_swtnl: svga->swc->destroy(svga->swc); no_swc: FREE(svga); -- cgit v1.2.3 From f9b48678463e926571ab5e547bb5ced5f4a6896a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 15:07:50 +0000 Subject: svga: Cannot use negate or abs on source to dsx/dsy instructions. --- src/gallium/drivers/svga/svga_tgsi_insn.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index f5842eff3d2..99600cf5c00 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -1704,6 +1704,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, } else { unsigned opcode; + const struct tgsi_full_src_register *reg = &insn->Src[0]; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_DDX: @@ -1716,7 +1720,21 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, return FALSE; } - return emit_simple_instruction( emit, opcode, insn ); + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + src0 = translate_src_register( emit, reg ); + + /* We cannot use negate or abs on source to dsx/dsy instruction. + */ + if (reg->Register.Absolute || + reg->Register.Negate) { + SVGA3dShaderDestToken temp = get_temp( emit ); + + if (!emit_repl( emit, temp, &src0 )) + return FALSE; + } + + return submit_op1( emit, inst, dst, src0 ); } } -- cgit v1.2.3 From 0ced789a0b56256891a2a26342befe645f4fa46d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 14:33:55 +0000 Subject: svga: Ensure pending drawing commands other surface operations are emitted before DMAs. This behavior was last when moving the transfers to the contexts. This fixes several piglit failures, which were reading the color renderbuffer before the draw operations were emitted. --- src/gallium/drivers/svga/svga_context.c | 25 ++++++++++++++++++++++++ src/gallium/drivers/svga/svga_context.h | 2 ++ src/gallium/drivers/svga/svga_pipe_blit.c | 4 +++- src/gallium/drivers/svga/svga_pipe_flush.c | 14 ++----------- src/gallium/drivers/svga/svga_pipe_misc.c | 2 +- src/gallium/drivers/svga/svga_resource_texture.c | 4 ++++ src/gallium/drivers/svga/svga_surface.c | 4 ++-- src/gallium/drivers/svga/svga_surface.h | 2 +- 8 files changed, 40 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 9bcf6571bd3..9b737a187e7 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -34,6 +34,7 @@ #include "svga_context.h" #include "svga_screen.h" +#include "svga_surface.h" #include "svga_resource_texture.h" #include "svga_resource_buffer.h" #include "svga_resource.h" @@ -247,6 +248,30 @@ void svga_hwtnl_flush_retry( struct svga_context *svga ) assert(ret == 0); } + +/* Emit all operations pending on host surfaces. + */ +void svga_surfaces_flush(struct svga_context *svga) +{ + unsigned i; + + /* Emit buffered drawing commands. + */ + svga_hwtnl_flush_retry( svga ); + + /* Emit back-copy from render target view to texture. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_propagate_surface(svga, svga->curr.framebuffer.cbufs[i]); + } + + if (svga->curr.framebuffer.zsbuf) + svga_propagate_surface(svga, svga->curr.framebuffer.zsbuf); + +} + + struct svga_winsys_context * svga_winsys_context( struct pipe_context *pipe ) { diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 4d4f50366ea..7b36a3606e0 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -453,6 +453,8 @@ void svga_context_flush( struct svga_context *svga, void svga_hwtnl_flush_retry( struct svga_context *svga ); +void svga_surfaces_flush(struct svga_context *svga); + struct pipe_context * svga_context_create(struct pipe_screen *screen, void *priv); diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index 426698806c8..c87afb6946c 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -50,7 +50,9 @@ static void svga_surface_copy(struct pipe_context *pipe, struct pipe_surface *srcsurf, *dstsurf;*/ unsigned dst_face, dst_z, src_face, src_z; - svga_hwtnl_flush_retry( svga ); + /* Emit buffered drawing commands, and any back copies. + */ + svga_surfaces_flush( svga ); #if 0 srcsurf = screen->get_tex_surface(screen, src_tex, diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index 6c69d29d15e..9357d827f28 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -36,20 +36,10 @@ static void svga_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { struct svga_context *svga = svga_context(pipe); - int i; - /* Emit buffered drawing commands. + /* Emit buffered drawing commands, and any back copies. */ - svga_hwtnl_flush_retry( svga ); - - /* Emit back-copy from render target view to texture. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (svga->curr.framebuffer.cbufs[i]) - svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]); - } - if (svga->curr.framebuffer.zsbuf) - svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf); + svga_surfaces_flush( svga ); /* Flush command queue. */ diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 8c24fb302f7..440919c6262 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -94,7 +94,7 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) - svga_propagate_surface(pipe, dst->cbufs[i]); + svga_propagate_surface(svga, dst->cbufs[i]); } /* XXX: Actually the virtual hardware may support rendertargets with diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 3bb6ecf81f0..994f30719ae 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -215,6 +215,10 @@ svga_transfer_dma(struct svga_context *svga, SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); } + /* Ensure any pending operations on host surfaces are queued on the command + * buffer first. + */ + svga_surfaces_flush( svga ); if(!st->swbuf) { /* Do the DMA transfer in a single go */ diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index 0cb58e66111..df18a560a62 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -340,7 +340,7 @@ void svga_mark_surfaces_dirty(struct svga_context *svga) * pipe is optional context to inline the blit command in. */ void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); struct svga_texture *tex = svga_texture(surf->texture); @@ -365,7 +365,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) if (s->handle != tex->handle) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, zslice, surf->u.tex.level, face, u_minify(tex->b.b.width0, surf->u.tex.level), diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index 755121945de..04166f1fca2 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -56,7 +56,7 @@ struct svga_surface extern void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf); +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf); extern boolean svga_surface_needs_propagation(struct pipe_surface *surf); -- cgit v1.2.3 From e16e70610c459721f4344dc6e61a8af1c2ad870d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Feb 2011 19:03:08 +0000 Subject: svga: Fix NULL dereference. Probably introduced with the surface view move from screen to context. --- src/gallium/drivers/svga/svga_sampler_view.c | 5 +++-- src/gallium/drivers/svga/svga_surface.c | 9 +++++---- src/gallium/drivers/svga/svga_surface.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 4d9ac6c324a..49cae2d44f6 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -55,7 +55,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, unsigned min_lod, unsigned max_lod) { - struct svga_screen *ss = svga_screen(pt->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; @@ -138,7 +139,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->age = tex->age; - sv->handle = svga_texture_view_surface(pipe, tex, flags, format, + sv->handle = svga_texture_view_surface(svga, tex, flags, format, min_lod, max_lod - min_lod + 1, -1, -1, diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index df18a560a62..3e8fb5f0271 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -100,7 +100,7 @@ svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, @@ -110,7 +110,7 @@ svga_texture_view_surface(struct pipe_context *pipe, int zslice_pick, struct svga_host_surface_cache_key *key) /* OUT */ { - struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_screen *ss = svga_screen(svga->pipe.screen); struct svga_winsys_surface *handle; uint32_t i, j; unsigned z_offset = 0; @@ -162,7 +162,7 @@ svga_texture_view_surface(struct pipe_context *pipe, u_minify(tex->b.b.depth0, i + start_mip) : 1); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, tex->handle, 0, 0, z_offset, i + start_mip, @@ -184,6 +184,7 @@ svga_create_surface(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_surface *surf_tmpl) { + struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(pt); struct pipe_screen *screen = pipe->screen; struct svga_surface *s; @@ -259,7 +260,7 @@ svga_create_surface(struct pipe_context *pipe, SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", pt, surf_tmpl->u.tex.level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, flags, format, + s->handle = svga_texture_view_surface(svga, tex, flags, format, surf_tmpl->u.tex.level, 1, face, zslice, &s->key); s->real_face = 0; diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index 04166f1fca2..bffc8c22c60 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -62,7 +62,7 @@ extern boolean svga_surface_needs_propagation(struct pipe_surface *surf); struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, -- cgit v1.2.3 From 0b436cf511316d4bf90246a39557900b4b566853 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 19 Feb 2011 00:16:44 +0100 Subject: r300g: fix a possible race when counting contexts Atomics aren't sufficient here. --- src/gallium/drivers/r300/r300_context.c | 6 ++++-- src/gallium/drivers/r300/r300_screen.c | 3 +++ src/gallium/drivers/r300/r300_screen.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index a89bf7fac31..da6b0bb8aa7 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -38,19 +38,21 @@ static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) { + pipe_mutex_lock(r300screen->num_contexts_mutex); if (diff > 0) { - p_atomic_inc(&r300screen->num_contexts); + r300screen->num_contexts++; if (r300screen->num_contexts > 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_MULTITHREADED); } else { - p_atomic_dec(&r300screen->num_contexts); + r300screen->num_contexts--; if (r300screen->num_contexts <= 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_SINGLETHREADED); } + pipe_mutex_unlock(r300screen->num_contexts_mutex); } static void r300_release_referenced_objects(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index ed47315f42d..77a9c6ad86f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -398,6 +398,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); util_slab_destroy(&r300screen->pool_buffers); + pipe_mutex_destroy(r300screen->num_contexts_mutex); if (rws) rws->destroy(rws); @@ -459,6 +460,8 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300screen->caps.is_r500 && rws->get_value(rws, R300_VID_DRM_2_3_0); + pipe_mutex_init(r300screen->num_contexts_mutex); + util_slab_create(&r300screen->pool_buffers, sizeof(struct r300_resource), 64, UTIL_SLAB_SINGLETHREADED); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index c935f55ccbf..576f9c1f4a9 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -52,6 +52,7 @@ struct r300_screen { /* The number of created contexts to know whether we have multiple * contexts or not. */ int num_contexts; + pipe_mutex num_contexts_mutex; }; -- cgit v1.2.3 From 91ea60395ef1fe046188b58753b2710301d07599 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 19 Feb 2011 10:29:59 +0000 Subject: scons: Add aliases for the llvmpipe unit tests. Now one can simply do scons lp_test_format --- src/gallium/drivers/llvmpipe/SConscript | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 26b258b9569..c10a8cbc12c 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -96,10 +96,15 @@ if env['platform'] != 'embedded': tests.append('round') for test in tests: + testname = 'lp_test_' + test target = env.Program( - target = 'lp_test_' + test, - source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + target = testname, + source = [testname + '.c', 'lp_test_main.c'], ) env.InstallProgram(target) + + # http://www.scons.org/wiki/UnitTests + alias = env.Alias(testname, [target], target[0].abspath) + AlwaysBuild(alias) Export('llvmpipe') -- cgit v1.2.3 From ec3c5ac5924d0daab424215a45ea3a089b9df54f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 11:46:17 +0100 Subject: i915g: Add dummy flush_frontbuffer --- src/gallium/drivers/i915/i915_screen.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index bdbc08e8086..5055c15d208 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -320,6 +320,20 @@ i915_fence_finish(struct pipe_screen *screen, */ +static void +i915_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *winsys_drawable_handle) +{ + /* XXX: Dummy right now. */ + (void)screen; + (void)resource; + (void)level; + (void)layer; + (void)winsys_drawable_handle; +} + static void i915_destroy_screen(struct pipe_screen *screen) { @@ -371,6 +385,7 @@ i915_screen_create(struct i915_winsys *iws) is->base.winsys = NULL; is->base.destroy = i915_destroy_screen; + is->base.flush_frontbuffer = i915_flush_frontbuffer; is->base.get_name = i915_get_name; is->base.get_vendor = i915_get_vendor; -- cgit v1.2.3 From dcb21d8b1cc3962faac600dabc87f3038acb5c5d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Feb 2011 18:24:36 +0000 Subject: svga: Remove some remaining fake S3TC rendering support. --- src/gallium/drivers/svga/svga_sampler_view.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 49cae2d44f6..4f1f4b597e8 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -79,10 +79,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, if (min_lod == 0 && max_lod >= pt->last_level) view = FALSE; - if (util_format_is_s3tc(pt->format) && view) { - format = svga_translate_format_render(pt->format); - } - if (ss->debug.no_sampler_view) view = FALSE; -- cgit v1.2.3 From e3c9bf1a670dd6924537d2e04d522ef899ee5b57 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 11:45:48 +0100 Subject: i915g: Reorg caps --- src/gallium/drivers/i915/i915_screen.c | 70 ++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 5055c15d208..31a8134eadc 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -98,59 +98,72 @@ i915_get_name(struct pipe_screen *screen) } static int -i915_get_param(struct pipe_screen *screen, enum pipe_cap param) +i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + switch (cap) { + /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: - return 1; + case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ + case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: return 1; - case PIPE_CAP_GLSL: - return 0; + + /* Unsupported features (boolean caps). */ case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + case PIPE_CAP_ARRAY_TEXTURES: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */ + case PIPE_CAP_GLSL: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_INSTANCED_DRAWING: /* draw module? */ case PIPE_CAP_OCCLUSION_QUERY: - return 0; + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TIMER_QUERY: return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 8; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return I915_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - /* disable for now */ - return 0; + default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static int -i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap) { switch(shader) { case PIPE_SHADER_VERTEX: - return draw_get_shader_param(shader, param); + return draw_get_shader_param(shader, cap); case PIPE_SHADER_FRAGMENT: break; default: @@ -158,7 +171,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha } /* XXX: these are just shader model 2.0 values, fix this! */ - switch(param) { + switch(cap) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: return 96; case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: @@ -191,15 +204,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_SUBROUTINES: return 0; default: - assert(0); + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static float -i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) +i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { + switch(cap) { case PIPE_CAP_MAX_LINE_WIDTH: /* fall-through */ case PIPE_CAP_MAX_LINE_WIDTH_AA: @@ -217,6 +230,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) return 16.0; default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } -- cgit v1.2.3 From e7cdcefbee6c8bffdc421f38d97578180e7991b5 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 13:58:11 +0100 Subject: i915g: TODO about untested code hidden behind caps Should be fairly easy to test and fix since you can look at the code in the classic driver. --- src/gallium/drivers/i915/TODO | 3 +++ src/gallium/drivers/i915/i915_screen.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index 94c428bebf8..0becf31d9bc 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -1,5 +1,8 @@ Random list of problems with i915g: +- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE and PIPE_CAP_TEXTURE_MIRROR_REPEAT + work, the code is there. If not fix it! A simple task, good for beginners. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 31a8134eadc..232262a9b50 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -108,10 +108,15 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TWO_SIDED_STENCIL: return 1; + /* Features that should be supported (boolean caps). */ + /* XXX: Just test the code */ + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 0; + /* Unsupported features (boolean caps). */ case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_ARRAY_TEXTURES: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */ case PIPE_CAP_GLSL: @@ -122,7 +127,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TIMER_QUERY: return 0; -- cgit v1.2.3 From a64176657602e9acc1b6c9a49ab26586f3a5c254 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 14:00:03 +0100 Subject: i915g: TODO about point sprites --- src/gallium/drivers/i915/TODO | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index 0becf31d9bc..eda5838a854 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -3,6 +3,9 @@ Random list of problems with i915g: - Check if PIPE_CAP_BLEND_EQUATION_SEPARATE and PIPE_CAP_TEXTURE_MIRROR_REPEAT work, the code is there. If not fix it! A simple task, good for beginners. +- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly + via the hardware, look at the classic driver, more advanced. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. -- cgit v1.2.3 From e7e1fd057e8cc78e7039637d045ac922bb82b8d8 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Feb 2011 21:27:05 +0000 Subject: i915g: Anisotropic filtering works --- src/gallium/drivers/i915/i915_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 232262a9b50..2260bcecf6e 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -102,6 +102,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { switch (cap) { /* Supported features (boolean caps). */ + case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ case PIPE_CAP_TEXTURE_SHADOW_MAP: @@ -115,7 +116,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) return 0; /* Unsupported features (boolean caps). */ - case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_ARRAY_TEXTURES: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */ -- cgit v1.2.3 From 3c74ecf687a7b23d7fcb5436722a3f4892034e0a Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 12:52:11 +0100 Subject: i915g: Rework texture tiling a bit --- src/gallium/drivers/i915/i915_resource_texture.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index e793d126ade..12790a3d780 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -172,15 +172,15 @@ i915_texture_set_image_offset(struct i915_texture *tex, } static enum i915_winsys_buffer_tile -i915_texture_tiling(struct pipe_resource *pt) +i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) { if (!i915_tiling) return I915_TILE_NONE; - if (pt->target == PIPE_TEXTURE_1D) + if (tex->b.b.target == PIPE_TEXTURE_1D) return I915_TILE_NONE; - if (util_format_is_s3tc(pt->format)) + if (util_format_is_s3tc(tex->b.b.format)) /* XXX X-tiling might make sense */ return I915_TILE_NONE; @@ -401,11 +401,7 @@ i915_texture_layout_3d(struct i915_texture *tex) static boolean i915_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -649,11 +645,7 @@ i945_texture_layout_cube(struct i915_texture *tex) static boolean i945_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -664,7 +656,7 @@ i945_texture_layout(struct i915_texture * tex) i945_texture_layout_3d(tex); break; case PIPE_TEXTURE_CUBE: - if (!util_format_is_s3tc(pt->format)) + if (!util_format_is_s3tc(tex->b.b.format)) i9x5_texture_layout_cube(tex); else i945_texture_layout_cube(tex); @@ -818,6 +810,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; + tex->tiling = i915_texture_tiling(is, tex); + if (is->is_i945) { if (!i945_texture_layout(tex)) goto fail; -- cgit v1.2.3 From fe6800a1bbd2f22fc3b3921765491d9ba179c4ff Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 11:41:32 +0100 Subject: i915g: Use debug get once options --- src/gallium/drivers/i915/i915_context.c | 5 ++++- src/gallium/drivers/i915/i915_debug.c | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 648d0090c9a..78a32340ba7 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,6 +39,9 @@ #include "pipe/p_screen.h" +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE); + + /* * Draw functions */ @@ -144,7 +147,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) */ i915->draw = draw_create(&i915->base); assert(i915->draw); - if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + if (!debug_get_option_i915_no_vbuf()) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 845e92cf5c6..8dbd36cc318 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -48,10 +48,13 @@ static const struct debug_named_value debug_options[] = { unsigned i915_debug = 0; boolean i915_tiling = TRUE; +DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0); +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE); + void i915_debug_init(struct i915_screen *screen) { - i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); - i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); + i915_debug = debug_get_option_i915_debug(); + i915_tiling = !debug_get_option_i915_no_tiling(); } -- cgit v1.2.3 From 27b49e91c982638497f7a92f7d611c29dd9cad18 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 12:52:55 +0100 Subject: i915g: Move debug fields to screen --- src/gallium/drivers/i915/i915_debug.c | 4 ++-- src/gallium/drivers/i915/i915_debug.h | 1 - src/gallium/drivers/i915/i915_resource_texture.c | 2 +- src/gallium/drivers/i915/i915_screen.h | 4 ++++ 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 8dbd36cc318..d11da83eb29 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -51,10 +51,10 @@ boolean i915_tiling = TRUE; DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0); DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE); -void i915_debug_init(struct i915_screen *screen) +void i915_debug_init(struct i915_screen *is) { i915_debug = debug_get_option_i915_debug(); - i915_tiling = !debug_get_option_i915_no_tiling(); + is->debug.tiling = !debug_get_option_i915_no_tiling(); } diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 11af7662f0a..fa60799d0c5 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; -extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 12790a3d780..aad5235a6ad 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -174,7 +174,7 @@ i915_texture_set_image_offset(struct i915_texture *tex, static enum i915_winsys_buffer_tile i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) { - if (!i915_tiling) + if (!is->debug.tiling) return I915_TILE_NONE; if (tex->b.b.target == PIPE_TEXTURE_1D) diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index bb4d255a3b3..e76e33ad42e 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -45,6 +45,10 @@ struct i915_screen struct i915_winsys *iws; boolean is_i945; + + struct { + boolean tiling; + } debug; }; -- cgit v1.2.3 From 43e6fe5549edb7e837480f28b1262357568d54ea Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 20 Feb 2011 13:41:18 +0100 Subject: i915g: Add option to lie about caps --- src/gallium/drivers/i915/i915_debug.c | 2 ++ src/gallium/drivers/i915/i915_screen.c | 7 ++++++- src/gallium/drivers/i915/i915_screen.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index d11da83eb29..e0ea025d0ec 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -50,11 +50,13 @@ boolean i915_tiling = TRUE; DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0); DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE); void i915_debug_init(struct i915_screen *is) { i915_debug = debug_get_option_i915_debug(); is->debug.tiling = !debug_get_option_i915_no_tiling(); + is->debug.lie = debug_get_option_i915_lie(); } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 2260bcecf6e..d929eb25bdc 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -100,6 +100,8 @@ i915_get_name(struct pipe_screen *screen) static int i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { + struct i915_screen *is = i915_screen(screen); + switch (cap) { /* Supported features (boolean caps). */ case PIPE_CAP_ANISOTROPIC_FILTER: @@ -123,7 +125,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_INSTANCED_DRAWING: /* draw module? */ - case PIPE_CAP_OCCLUSION_QUERY: case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -131,6 +132,10 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TIMER_QUERY: return 0; + /* Features we can lie about (boolean caps). */ + case PIPE_CAP_OCCLUSION_QUERY: + return is->debug.lie ? 1 : 0; + /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index e76e33ad42e..60f0e2971e0 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -48,6 +48,7 @@ struct i915_screen struct { boolean tiling; + boolean lie; } debug; }; -- cgit v1.2.3 From 1e966636d06e92f562a7dbcd69aa88c4c9283c41 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 21 Feb 2011 18:25:20 +0100 Subject: i915g: s/bool/boolean/ style-fixup in winsys Signed-off-by: Daniel Vetter --- src/gallium/drivers/i915/i915_winsys.h | 3 ++- src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c | 2 +- src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 24ea416f015..e915a886c9b 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -103,11 +103,12 @@ struct i915_winsys { * @usage how is the hardware going to use the buffer. * @offset add this to the reloc buffers address * @target buffer where to write the address, null for batchbuffer. + * @fenced relocation needs a fence. */ int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset, bool fenced); + unsigned offset, boolean fenced); /** * Flush a bufferbatch. diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index 5894c133c30..dec19d42698 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -74,7 +74,7 @@ static int i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add, bool fenced) + unsigned pre_add, boolean fenced) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); unsigned write_domain = 0; diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index 6ffba444897..dcb0e54b673 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -61,7 +61,7 @@ static int i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add, bool fenced) + unsigned pre_add, boolean fenced) { struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); int ret = 0; -- cgit v1.2.3 From 7898d2ae16d335e27da599cd3cab04528248f959 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Feb 2011 12:44:10 -0700 Subject: i915g: remove extra semicolons --- src/gallium/drivers/i915/i915_debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index e0ea025d0ec..1713bf131f2 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -48,9 +48,9 @@ static const struct debug_named_value debug_options[] = { unsigned i915_debug = 0; boolean i915_tiling = TRUE; -DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0); -DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE); -DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE); +DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0) +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE) void i915_debug_init(struct i915_screen *is) { -- cgit v1.2.3 From cbe47a2459c3b3a78a98038aed1990ec8627bb49 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Feb 2011 12:44:42 -0700 Subject: r300g: fix missing initializers warning --- src/gallium/drivers/r300/r300_context.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index da6b0bb8aa7..9f85bd4ce5f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -482,7 +482,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } { - struct pipe_resource vb = {}; + struct pipe_resource vb; + memset(&vb, 0, sizeof(vb)); vb.target = PIPE_BUFFER; vb.format = PIPE_FORMAT_R8_UNORM; vb.bind = PIPE_BIND_VERTEX_BUFFER; @@ -495,7 +496,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } { - struct pipe_depth_stencil_alpha_state dsa = {}; + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); dsa.depth.writemask = 1; r300->dsa_decompress_zmask = -- cgit v1.2.3 From 0ed5bf668db24fb56b5b359399099c89531e2a0a Mon Sep 17 00:00:00 2001 From: Fabian Bieler Date: Mon, 14 Feb 2011 22:44:42 +0100 Subject: r600g: Request DWORD aligned vertex buffers. The spec says that the offsets in the vertex-fetch instructions need to be byte-aligned and makes no specification with regard to the required alignment of the offset and stride in the vertex resource constant register. However, testing indicates that all three values need to be DWORD aligned. --- src/gallium/drivers/r600/r600_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9d6c9bd5429..79b0d02252d 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -196,7 +196,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | PIPE_BIND_CONSTANT_BUFFER, - U_VERTEX_FETCH_BYTE_ALIGNED); + U_VERTEX_FETCH_DWORD_ALIGNED); if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); return NULL; -- cgit v1.2.3 From 4407e5078f6083e4e56ba5970d0fa10d504ed45b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 22 Feb 2011 22:28:06 +0000 Subject: i915g: Always set vbo to flush on flushes Reported-by Chris Wilson --- src/gallium/drivers/i915/i915_flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index a2c70b11991..f5435bb8453 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -74,7 +74,6 @@ static void i915_flush_pipe( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); - i915->vbo_flushed = 1; I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); } @@ -93,5 +92,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) struct i915_winsys_batchbuffer *batch = i915->batch; batch->iws->batchbuffer_flush(batch, fence); + i915->vbo_flushed = 1; i915->hardware_dirty = ~0; } -- cgit v1.2.3 From fc77dee0bd6499177418be347bac875b12277053 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 23 Feb 2011 00:11:09 +0000 Subject: i915g: Enable mirror repeat wrap mode --- src/gallium/drivers/i915/TODO | 4 ++-- src/gallium/drivers/i915/i915_screen.c | 2 +- src/gallium/drivers/i915/i915_state.c | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index eda5838a854..fc644fe7f93 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -1,7 +1,7 @@ Random list of problems with i915g: -- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE and PIPE_CAP_TEXTURE_MIRROR_REPEAT - work, the code is there. If not fix it! A simple task, good for beginners. +- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there. + If not fix it! A simple task, good for beginners. - Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly via the hardware, look at the classic driver, more advanced. diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index d929eb25bdc..77febbf5012 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -107,6 +107,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: return 1; @@ -114,7 +115,6 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) /* Features that should be supported (boolean caps). */ /* XXX: Just test the code */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 0; /* Unsupported features (boolean caps). */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index f380708847b..58bbbd1de2c 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,10 +58,8 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; - /* - case PIPE_TEX_WRAP_MIRRORED_REPEAT: + case PIPE_TEX_WRAP_MIRROR_REPEAT: return TEXCOORDMODE_MIRROR; - */ default: return TEXCOORDMODE_WRAP; } -- cgit v1.2.3 From 481fad15526d30e121d031bfcb765ed7847f50a7 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Feb 2011 22:47:40 +0000 Subject: i915g: Remove outdated comment --- src/gallium/drivers/i915/i915_state_immediate.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index f9ade7077f2..d691fadc9e8 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,14 +36,6 @@ #include "util/u_memory.h" -/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. - * Would like to opportunistically recombine all these fragments into - * a single packet containing only what has changed, but for now emit - * as multiple packets. - */ - - - /*********************************************************************** * S0,S1: Vertex buffer state. -- cgit v1.2.3 From 42b8b2be8553390f2e5f847759e886b910b550cc Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Feb 2011 23:09:43 +0000 Subject: i915g: Clean up in i915_state_immediate --- src/gallium/drivers/i915/i915_state_immediate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index d691fadc9e8..00dcf9cb2d0 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -50,7 +50,6 @@ static void upload_S0S1(struct i915_context *i915) /* I915_NEW_VERTEX_SIZE */ - /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; @@ -90,9 +89,6 @@ static void upload_S2S4(struct i915_context *i915) { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; - /* - debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); - */ assert(LIS4); /* should never be zero? */ } @@ -179,7 +175,7 @@ static void upload_S6(struct i915_context *i915) } const struct i915_tracked_state i915_upload_S6 = { - "imm s6", + "imm S6", upload_S6, I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; -- cgit v1.2.3 From 69cfc16cb670804b272e88abac5ed3856bb70fa8 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 22 Feb 2011 22:07:03 +0000 Subject: i915g: Disable LIS7 state updates for now --- src/gallium/drivers/i915/TODO | 2 ++ src/gallium/drivers/i915/i915_state_immediate.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index fc644fe7f93..f4e1423fa59 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -6,6 +6,8 @@ Random list of problems with i915g: - Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly via the hardware, look at the classic driver, more advanced. +- What does this button do? Figure out LIS7 with regards to depth offset. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 00dcf9cb2d0..3dd227f6045 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -130,7 +130,7 @@ static void upload_S5(struct i915_context *i915) #if 0 /* I915_NEW_RASTERIZER */ - if (i915->state.Polygon->OffsetFill) { + if (i915->rasterizer->LIS7) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif @@ -192,10 +192,12 @@ static void upload_S7(struct i915_context *i915) */ LIS7 = i915->rasterizer->LIS7; +#if 0 if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; i915->hardware_dirty |= I915_HW_IMMEDIATE; } +#endif } const struct i915_tracked_state i915_upload_S7 = { -- cgit v1.2.3 From b9baad2aff6ddc5145d91cbfb81d083a21990a80 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Feb 2011 23:39:10 +0000 Subject: i915g: Lazy emit immediate state --- src/gallium/drivers/i915/i915_context.c | 1 + src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_flush.c | 1 + src/gallium/drivers/i915/i915_state_emit.c | 53 +++++++++++----------- src/gallium/drivers/i915/i915_state_immediate.c | 58 ++++++++++++------------- 5 files changed, 59 insertions(+), 55 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 78a32340ba7..99303fae36a 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -163,6 +163,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->dirty = ~0; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 7f49dc96d5d..0e53b0eafd5 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -235,6 +235,7 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; + unsigned immediate_dirty; struct util_slab_mempool transfer_pool; }; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index f5435bb8453..440e07e5ed5 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -94,4 +94,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) batch->iws->batchbuffer_flush(batch, fence); i915->vbo_flushed = 1; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 5a89977c26c..fcbe299ec24 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -35,6 +35,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "util/u_math.h" + static unsigned translate_format( enum pipe_format format ) { switch (format) { @@ -178,11 +180,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); /* disable indirect state for now @@ -194,27 +191,30 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 7 dwords, 1 relocs */ if (i915->hardware_dirty & I915_HW_IMMEDIATE) { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } } #if 01 @@ -443,4 +443,5 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 3dd227f6045..81348647399 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,6 +36,22 @@ #include "util/u_memory.h" +/* Convinience function to check immediate state. + */ + +static INLINE void set_immediate(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.immediate[offset] == state) + return; + + i915->current.immediate[offset] = state; + i915->immediate_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_IMMEDIATE; +} + + /*********************************************************************** * S0,S1: Vertex buffer state. @@ -48,6 +64,12 @@ static void upload_S0S1(struct i915_context *i915) */ LIS0 = i915->vbo_offset; + /* Need to force this */ + if (i915->dirty & I915_NEW_VBO) { + i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } + /* I915_NEW_VERTEX_SIZE */ { @@ -57,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915) (vertex_size << 16)); } - /* I915_NEW_VBO - */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S0, LIS0); + set_immediate(i915, I915_IMMEDIATE_S1, LIS1); } const struct i915_tracked_state i915_upload_S0S1 = { @@ -94,13 +108,8 @@ static void upload_S2S4(struct i915_context *i915) LIS4 |= i915->rasterizer->LIS4; - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S2, LIS2); + set_immediate(i915, I915_IMMEDIATE_S4, LIS4); } const struct i915_tracked_state i915_upload_S2S4 = { @@ -135,10 +144,7 @@ static void upload_S5(struct i915_context *i915) } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S5, LIS5); } const struct i915_tracked_state i915_upload_S5 = { @@ -168,10 +174,7 @@ static void upload_S6(struct i915_context *i915) */ LIS6 |= i915->depth_stencil->depth_LIS6; - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S6, LIS6); } const struct i915_tracked_state i915_upload_S6 = { @@ -193,10 +196,7 @@ static void upload_S7(struct i915_context *i915) LIS7 = i915->rasterizer->LIS7; #if 0 - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S7, LIS7); #endif } -- cgit v1.2.3 From 8fb0ecd0cf4a4d672c0744abb4a9f030b3a17527 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 22 Feb 2011 23:12:08 +0000 Subject: i915g: Lazy emit dynamic state --- src/gallium/drivers/i915/i915_context.c | 1 + src/gallium/drivers/i915/i915_context.h | 1 + src/gallium/drivers/i915/i915_flush.c | 1 + src/gallium/drivers/i915/i915_state_dynamic.c | 69 ++++++++++++--------------- src/gallium/drivers/i915/i915_state_emit.c | 4 +- 5 files changed, 36 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 99303fae36a..707b2e9f956 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -164,6 +164,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->dirty = ~0; i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 0e53b0eafd5..2cf53424f06 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -236,6 +236,7 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; unsigned immediate_dirty; + unsigned dynamic_dirty; struct util_slab_mempool transfer_pool; }; diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 440e07e5ed5..911c051d1f2 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -95,4 +95,5 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->vbo_flushed = 1; i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; } diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index d61a8c3407f..204cee6fe9e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -46,18 +46,34 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect(struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords) +static INLINE void set_dynamic(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.dynamic[offset] == state) + return; + + i915->current.dynamic[offset] = state; + i915->dynamic_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + + +static INLINE void set_dynamic_array(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4)) return; - for (i = 0; i < dwords; i++) + for (i = 0; i < dwords; i++) { i915->current.dynamic[offset + i] = src[i]; + i915->dynamic_dirty |= 1 << (offset + i); + } i915->hardware_dirty |= I915_HW_DYNAMIC; } @@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915) */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: - */ - set_dynamic_indirect(i915, - I915_DYNAMIC_MODES4, - &modes4, - 1); + set_dynamic(i915, I915_DYNAMIC_MODES4, modes4); } const struct i915_tracked_state i915_upload_MODES4 = { @@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915) bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect(i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2); + set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2); } const struct i915_tracked_state i915_upload_BFO = { @@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915) color[3]); } - set_dynamic_indirect(i915, - I915_DYNAMIC_BC_0, - bc, - 2); + set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { @@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - set_dynamic_indirect(i915, - I915_DYNAMIC_IAB, - &iab, - 1); + set_dynamic(i915, I915_DYNAMIC_IAB, iab); } const struct i915_tracked_state i915_upload_IAB = { @@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = { */ static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect(i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2); + set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0, + &i915->rasterizer->ds[0].u, 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { @@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915) (p[3] << 12)); } - set_dynamic_indirect(i915, - I915_DYNAMIC_STP_0, - &st[0], - 2); + set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2); } const struct i915_tracked_state i915_upload_STIPPLE = { @@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = { */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1); + set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { @@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3); + set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3); } const struct i915_tracked_state i915_upload_SCISSOR_RECT = { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index fcbe299ec24..509d487b498 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -223,7 +223,8 @@ i915_emit_hardware_state(struct i915_context *i915 ) { int i; for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); + if (i915->dynamic_dirty & (1 << i)); + OUT_BATCH(i915->current.dynamic[i]); } } #endif @@ -444,4 +445,5 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->hardware_dirty = 0; i915->immediate_dirty = 0; + i915->dynamic_dirty = 0; } -- cgit v1.2.3 From 69d969e8fafd3357a140072f0f4bbf0f28db9769 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 17 Feb 2011 15:07:57 +1000 Subject: r600g: EXT_texture_array support. This adds EXT_texture_array support to r600g, it passes the piglit array-texture test but I suspect may not be complete. It currently requires a kernel patch to fix the CS checker to allow these, so you need to use R600_ARRAY_TEXTURE=true for now to enable them. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_state_inlines.h | 4 ++++ src/gallium/drivers/r600/r600_pipe.c | 5 ++++- src/gallium/drivers/r600/r600_shader.c | 6 ++++++ src/gallium/drivers/r600/r600_state.c | 18 ++++++++++++++---- src/gallium/drivers/r600/r600_state_inlines.h | 4 ++++ src/gallium/drivers/r600/r600_texture.c | 11 +++++++---- 6 files changed, 39 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index f48b8a95d6f..b5fcc7106fe 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_030000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 79b0d02252d..62d108f3518 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -292,9 +292,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ case PIPE_CAP_INSTANCED_DRAWING: - case PIPE_CAP_ARRAY_TEXTURES: return 0; + case PIPE_CAP_ARRAY_TEXTURES: + /* fix once the CS checker upstream is fixed */ + return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE); + /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index acb3ef2c4d6..13ccc3fdc1f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1850,6 +1850,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_w = 1; } + if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { + tex.coord_type_z = 0; + tex.src_sel_z = 1; + } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) + tex.coord_type_z = 0; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = 2; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a1f83ac4271..c365979e439 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -402,6 +402,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; + unsigned height, depth; if (resource == NULL) return NULL; @@ -446,6 +447,15 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; + height = texture->height0; + depth = texture->depth0; + if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = 1; + depth = texture->array_size; + } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { + depth = texture->array_size; + } + /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, S_038000_DIM(r600_tex_dim(texture->target)) | @@ -454,8 +464,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); @@ -468,8 +478,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 7d5c9e0a050..29e12f1d468 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index ce06d74058f..095558d0337 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -79,10 +79,8 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, switch (rtex->resource.b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * rtex->layer_size[level]; default: - assert(layer == 0); - return offset; + return offset + layer * rtex->layer_size[level]; } } @@ -262,8 +260,11 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * 6; } - else + else if (ptex->target == PIPE_TEXTURE_3D) size = layer_size * u_minify(ptex->depth0, i); + else + size = layer_size * ptex->array_size; + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); @@ -507,6 +508,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = 1; + resource.array_size = 1; resource.last_level = texture->last_level; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; @@ -642,6 +644,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, return &trans->transfer; } trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->transfer.layer_stride = rtex->layer_size[level]; trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } -- cgit v1.2.3 From 369ece170257ef687ca609cacd1d66d186274eb3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 23 Feb 2011 13:32:37 +0000 Subject: svga: Ensure rendertargets and textures are always rebound at every command buffer start. The svga_update_state() mechanism is inadequate as it will always end up flushing the primitives before processing the SVGA_NEW_COMMAND_BUFFER dirty state flag. --- src/gallium/drivers/svga/svga_context.c | 16 +++++ src/gallium/drivers/svga/svga_state.h | 4 ++ src/gallium/drivers/svga/svga_state_framebuffer.c | 49 ++++++++++++++ src/gallium/drivers/svga/svga_state_tss.c | 78 ++++++++++++++++++++--- 4 files changed, 138 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 9b737a187e7..f0f875b2b23 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -204,6 +204,7 @@ void svga_context_flush( struct svga_context *svga, { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_fence_handle *fence = NULL; + enum pipe_error ret; svga->curr.nr_fbs = 0; @@ -223,6 +224,21 @@ void svga_context_flush( struct svga_context *svga, */ svga->dirty |= SVGA_NEW_COMMAND_BUFFER; + /* + * We must reemit the surface bindings here, because svga_update_state + * will always flush the primitives before processing the + * SVGA_NEW_COMMAND_BUFFER state change. + * + * TODO: Refactor this. + */ + ret = svga_reemit_framebuffer_bindings(svga); + assert(ret == PIPE_OK); + + ret = svga_reemit_tss_bindings(svga); + assert(ret == PIPE_OK); + + svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER; + if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0); diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h index 22d5a6d552a..7f239e7a322 100644 --- a/src/gallium/drivers/svga/svga_state.h +++ b/src/gallium/drivers/svga/svga_state.h @@ -92,4 +92,8 @@ void svga_update_state_retry( struct svga_context *svga, enum pipe_error svga_emit_initial_state( struct svga_context *svga ); +enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga ); + +enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga ); + #endif diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index fcbb35e7972..cdadb20c178 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -93,6 +93,55 @@ static int emit_framebuffer( struct svga_context *svga, } +/* + * Rebind rendertargets. + * + * Similar to emit_framebuffer, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty rendertargets are properly paged-in. + */ +enum pipe_error +svga_reemit_framebuffer_bindings(struct svga_context *svga) +{ + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) { + if (hw->cbufs[i]) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]); + if (ret != PIPE_OK) { + return ret; + } + } + } + + if (hw->zsbuf) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + + if (hw->zsbuf && + hw->zsbuf->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + } + else { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL); + if (ret != PIPE_OK) { + return ret; + } + } + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_framebuffer = { "hw framebuffer state", diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index f8b269a101e..c502506b93b 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -52,6 +52,16 @@ void svga_cleanup_tss_binding(struct svga_context *svga) } +struct bind_queue { + struct { + unsigned unit; + struct svga_hw_view_state *view; + } bind[PIPE_MAX_SAMPLERS]; + + unsigned bind_count; +}; + + static int update_tss_binding(struct svga_context *svga, unsigned dirty ) @@ -63,15 +73,7 @@ update_tss_binding(struct svga_context *svga, unsigned min_lod; unsigned max_lod; - - struct { - struct { - unsigned unit; - struct svga_hw_view_state *view; - } bind[PIPE_MAX_SAMPLERS]; - - unsigned bind_count; - } queue; + struct bind_queue queue; queue.bind_count = 0; @@ -164,6 +166,64 @@ fail: } +/* + * Rebind textures. + * + * Similar to update_tss_binding, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty textures are properly paged-in. + */ +enum pipe_error +svga_reemit_tss_bindings(struct svga_context *svga) +{ + unsigned i; + enum pipe_error ret; + struct bind_queue queue; + + queue.bind_count = 0; + + for (i = 0; i < svga->state.hw_draw.num_views; i++) { + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + if (view->v) { + queue.bind[queue.bind_count].unit = i; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + } + + if (queue.bind_count) { + SVGA3dTextureState *ts; + + ret = SVGA3D_BeginSetTextureState(svga->swc, + &ts, + queue.bind_count); + if (ret != PIPE_OK) { + return ret; + } + + for (i = 0; i < queue.bind_count; i++) { + struct svga_winsys_surface *handle; + + ts[i].stage = queue.bind[i].unit; + ts[i].name = SVGA3D_TS_BIND_TEXTURE; + + assert(queue.bind[i].view->v); + handle = queue.bind[i].view->v->handle; + svga->swc->surface_relocation(svga->swc, + &ts[i].value, + handle, + SVGA_RELOC_READ); + } + + SVGA_FIFOCommitAll(svga->swc); + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_tss_binding = { "texture binding emit", SVGA_NEW_TEXTURE_BINDING | -- cgit v1.2.3 From 3d190e44dec40650d88256cb074a12ca74d7c31e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 14:14:40 +0100 Subject: nvc0: don't overwrite phi sources at the end of a loop Except the reference to its own result. --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 687def0344d..e2838a0f1d0 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -465,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) reg = (struct bld_register *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -476,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, phi, s++, val); + } } bld->pc->current_block = save; -- cgit v1.2.3 From 88066d62ae7ec9c715e195f8ff65a0dc5b64c25e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 14:18:28 +0100 Subject: nvc0: don't visit target blocks of a loop break multiple times --- src/gallium/drivers/nvc0/nvc0_pc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index e0cba05b976..3a3a00f27be 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -189,7 +189,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, bb[p++] = b->out[j]; break; case CFG_EDGE_LOOP_LEAVE: - bbb[pp++] = b->out[j]; + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } break; default: assert(0); -- cgit v1.2.3 From 7d8ff54feb0b590048184bb41e214a511770fd20 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 20:26:29 +0100 Subject: nvc0: fix SSG --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index e2838a0f1d0..18ae0e00c41 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1825,11 +1825,11 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_SSG: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); - temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); - temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); - dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); - bld_src_predicate(bld, dst0[c]->insn, 1, src1); + src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src1->insn->set_cond = NV_CC_GT; + src2->insn->set_cond = NV_CC_LT; + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); } break; case TGSI_OPCODE_SUB: -- cgit v1.2.3 From 1579017b08f28d460e17de65bcc8ba17ba695c37 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 19 Feb 2011 20:26:49 +0100 Subject: nvc0: multiply polygon offset units by 2 Wasn't sure if this still was necessary because the piglit test started to fail at some point on nv50 where we already do this. --- src/gallium/drivers/nvc0/nvc0_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 666e3802979..aa437195764 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -238,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); SB_DATA (so, fui(cso->offset_scale)); SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); - SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ + SB_DATA (so, fui(cso->offset_units * 2.0f)); } assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); -- cgit v1.2.3 From 410a13c5ce799fe97a4e4503190d0f66fb2559a3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 15:10:02 +0100 Subject: nvc0: values for undefined outputs must have file GPR --- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 18ae0e00c41..5e208e8623d 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -133,13 +133,10 @@ struct bld_context { static INLINE ubyte bld_register_file(struct bld_context *bld, struct bld_register *reg) { - if (reg < &bld->avs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; - else - return NV_FILE_MEM_V; + if (reg >= &bld->pvs[0][0] && + reg < &bld->ovs[0][0]) + return NV_FILE_PRED; + return NV_FILE_GPR; } static INLINE struct nv_value * -- cgit v1.2.3 From a6ea37da4bd02241ce3bf522b93dd7ff0757f959 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 17:57:47 +0100 Subject: nvc0: improve userspace fencing Before, there were situations in which we never checked the fences for completion (some loading screens for example) and thus never released memory. --- src/gallium/drivers/nvc0/nvc0_context.c | 22 +++++++++++++------- src/gallium/drivers/nvc0/nvc0_context.h | 2 ++ src/gallium/drivers/nvc0/nvc0_fence.c | 36 +++++++++++++++++++-------------- src/gallium/drivers/nvc0/nvc0_fence.h | 3 ++- src/gallium/drivers/nvc0/nvc0_screen.h | 3 ++- src/gallium/drivers/nvc0/nvc0_vbo.c | 6 ++++-- 6 files changed, 46 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 20c1a31b5b3..f02de4d044a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -47,15 +47,12 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); } - if (fence) { - nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); - } + if (fence) + nvc0_fence_reference((struct nvc0_fence **)fence, + nvc0->screen->fence.current); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) FIRE_RING(chan); - - nvc0_screen_fence_next(nvc0->screen); - } } static void @@ -71,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe) FREE(nvc0); } +void +nvc0_default_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nvc0_screen_fence_update(nvc0->screen, TRUE); + + nvc0_screen_fence_next(nvc0->screen); +} + struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv) { @@ -95,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nvc0->pipe.flush = nvc0_flush; screen->base.channel->user_private = nvc0; + screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index a082ad4575c..3722f358d89 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -156,6 +156,8 @@ nvc0_surface(struct pipe_surface *ps) /* nvc0_context.c */ struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_default_flush_notify(struct nouveau_channel *); + void nvc0_bufctx_emit_relocs(struct nvc0_context *); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 3a3dd75c152..f2d4b1451bf 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -84,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence) struct nvc0_fence *it; struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) { + if (fence->state == NVC0_FENCE_STATE_EMITTED || + fence->state == NVC0_FENCE_STATE_FLUSHED) { if (fence == screen->fence.head) { screen->fence.head = fence->next; if (!screen->fence.head) @@ -119,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) fence->buffers = NULL; } -static void -nvc0_screen_fence_update(struct nvc0_screen *screen) +void +nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed) { struct nvc0_fence *fence; struct nvc0_fence *next = NULL; @@ -147,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) screen->fence.head = next; if (!next) screen->fence.tail = NULL; -} -#define NVC0_FENCE_MAX_SPINS (1 << 17) + if (flushed) { + for (fence = next; fence; fence = fence->next) + fence->state = NVC0_FENCE_STATE_FLUSHED; + } +} boolean nvc0_fence_signalled(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) - nvc0_screen_fence_update(screen); + if (fence->state >= NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen, FALSE); return fence->state == NVC0_FENCE_STATE_SIGNALLED; } +#define NVC0_FENCE_MAX_SPINS (1 << 31) + boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - int spins = 0; + uint32_t spins = 0; - if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + if (fence->state < NVC0_FENCE_STATE_EMITTED) { nvc0_fence_emit(fence); - FIRE_RING(screen->base.channel); - if (fence == screen->fence.current) nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); } + if (fence->state < NVC0_FENCE_STATE_FLUSHED) + FIRE_RING(screen->base.channel); do { - nvc0_screen_fence_update(screen); + nvc0_screen_fence_update(screen, FALSE); if (fence->state == NVC0_FENCE_STATE_SIGNALLED) return TRUE; @@ -189,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence) #endif } while (spins < NVC0_FENCE_MAX_SPINS); - if (spins > 9000) - NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n", + fence->sequence, + screen->fence.sequence_ack, screen->fence.sequence); return FALSE; } @@ -200,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen) { nvc0_fence_emit(screen->fence.current); nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); - nvc0_screen_fence_update(screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index e63c164bda4..3d8c3f8ba60 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -7,7 +7,8 @@ #define NVC0_FENCE_STATE_AVAILABLE 0 #define NVC0_FENCE_STATE_EMITTED 1 -#define NVC0_FENCE_STATE_SIGNALLED 2 +#define NVC0_FENCE_STATE_FLUSHED 2 +#define NVC0_FENCE_STATE_SIGNALLED 3 struct nvc0_mm_allocation; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 3b676fd21a1..5af96cbacea 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -138,9 +138,10 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) boolean nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); - void nvc0_screen_fence_next(struct nvc0_screen *); +void +nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed); static INLINE boolean nvc0_screen_fence_emit(struct nvc0_screen *screen) diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 19fd85273c1..2db43d8704b 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -371,6 +371,8 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { struct nvc0_context *nvc0 = chan->user_private; + nvc0_screen_fence_update(nvc0->screen, TRUE); + nvc0_bufctx_emit_relocs(nvc0); } @@ -398,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } static void @@ -568,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, } } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } void -- cgit v1.2.3 From 67c7aefea33a7935e42ede30463eb7ca5009b068 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:04:49 +0100 Subject: nvc0: sync textures with render targets ourselves Fixes for example piglit/fbo-flushing and nexuiz' bloom effect. --- src/gallium/drivers/nvc0/nvc0_buffer.c | 6 +++--- src/gallium/drivers/nvc0/nvc0_resource.h | 5 +++-- src/gallium/drivers/nvc0/nvc0_screen.h | 5 +++++ src/gallium/drivers/nvc0/nvc0_state_validate.c | 16 ++++++++++++++++ src/gallium/drivers/nvc0/nvc0_tex.c | 7 +++++++ src/gallium/drivers/nvc0/nvc0_winsys.h | 2 +- 6 files changed, 35 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index f16671ac7ff..aa949bdfa36 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -117,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, memcpy(buf->data + start, bounce->map, size); nouveau_bo_unmap(bounce); - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; nouveau_bo_ref(NULL, &bounce); if (mm) @@ -156,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, release_allocation(&mm, nvc0->screen->fence.current); if (start == 0 && size == buf->base.width0) - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; return TRUE; } @@ -179,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe, if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & PIPE_TRANSFER_READ) { - if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING) nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); } } diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 709e6157f55..599823c0dc9 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -24,7 +24,8 @@ struct nvc0_context; * USER_MEMORY: resource->data is a pointer to client memory and may change * between GL calls */ -#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1) #define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to @@ -90,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0, nvc0_buffer_adjust_score(nvc0, res, -250); if ((res->domain == NOUVEAU_BO_VRAM) && - (res->status & NVC0_BUFFER_STATUS_DIRTY)) + (res->status & NVC0_BUFFER_STATUS_GPU_WRITING)) nvc0_buffer_download(nvc0, res, 0, res->base.width0); if ((res->domain != NOUVEAU_BO_GART) || diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 5af96cbacea..d952ff1f9b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -131,6 +131,11 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) if (likely(res->bo)) { nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_resource_fence(res, flags); } } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 96c1198d4cb..0cc0a0c6236 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -58,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; unsigned i; + boolean serialize = FALSE; nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); @@ -86,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->depth); OUT_RING (chan, mt->layer_stride >> 2); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } @@ -111,12 +117,22 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->height); OUT_RING (chan, (unk << 16) | sf->depth); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } else { BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); } + + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index b219f82c903..968558a5869 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) OUT_RINGp (chan, &tic->tic[3], 5); need_flush = TRUE; + } else + if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, (tic->id << 4) | 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 1544fb7a1de..45f71967eff 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { if (flags & NOUVEAU_BO_WR) - res->status |= NVC0_BUFFER_STATUS_DIRTY; + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } -- cgit v1.2.3 From 4377657f8e204fe2c7b6af194293dd3bea63fca8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:08:23 +0100 Subject: nvc0: correct allocation of constrained registers In linear scan we can't allocate multiple values with different live ranges at the same time to assign them consecutive regs. Maybe we should just switch to graph coloring for all values ... --- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 221 +++++++++++++++++++--------- 1 file changed, 154 insertions(+), 67 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index 718943bdbdf..d7213949483 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -39,6 +39,30 @@ struct register_set { struct nv_pc *pc; }; +/* aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; + dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + set->bits[i][0] = (set->bits[i][0] | mask) & umask; + set->bits[i][1] = (set->bits[i][1] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; struct nv_instruction **insns; @@ -327,14 +351,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NVC0_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NVC0_RA_DEBUG_JOIN @@ -342,10 +366,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -474,8 +515,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_BIND (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; @@ -484,29 +530,28 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; for (c = 0; c < 6 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 6 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_BIND: - if (iter) + if (!(mask & JOIN_MASK_BIND)) break; for (c = 0; c < 4 && i->src[c]; ++c) - do_join_values(ctx, i->def[c], i->src[c]->value); + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; case NV_OP_TEX: case NV_OP_TXB: @@ -743,21 +788,6 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } -/* We allocate registers for all defs of a vector instruction at once. - * Since we'll encounter all of them in the allocation loop, do the allocation - * when we're at the one with the live range that starts latest. - */ -static boolean -is_best_representative(struct nv_value *val) -{ - struct nv_instruction *nvi = val->insn; - int i; - for (i = 0; i < 4 && val->insn->def[i]; ++i) - if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) - return FALSE; - return TRUE; -} - static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -774,42 +804,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); - nvc0_ctor_register_set(ctx->pc, &free); + make_empty_list(head); - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 5; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nvc0_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -846,16 +880,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - int v = nvi_vector_size(cur->insn); - - if (v > 1) { - if (is_best_representative(cur)) - mem = !reg_assign(&f, &cur->insn->def[0], v); - } else { - if (iter) - mem = !reg_assign(&f, &cur, 1); - } + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -869,6 +894,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + for (c = 0; c < vsize; ++c) { + nvc0_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + mask = 0x11111111; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x11111111 << 2; + mask_register_set(®s[c], 0, mask << c); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { @@ -922,19 +1009,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; - ret = pass_linear_scan(ctx, 1); + ret = pass_linear_scan(ctx); if (ret) goto out; -- cgit v1.2.3 From cd47f10c901d7ac48843586432c2e592ed35eed3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:22:15 +0100 Subject: nvc0: preemptively insert branch at ENDIF Might be necessary if a block sneaks in somewhere, like a common block for moves of phi sources after a loop break. This is harmless and normally will be removed before emission. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 6 +++++- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 3d03400518b..8d4d0f3af60 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -142,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) struct nv_instruction *nvi, *next; int j; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -158,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nvc0_insn_delete(in->exit); } b->emit_pos = in->emit_pos + in->emit_size; + + if (in->emit_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 5e208e8623d..fc19ef1eb19 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1634,6 +1634,10 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->terminator) + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); + --bld->cond_lvl; nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); -- cgit v1.2.3 From f01748355360ac98c772ce8b82ca0e6c2f94629a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 22 Feb 2011 21:50:17 +0100 Subject: nvc0: kick out empty live ranges They affect overlap tests even though they're actually empty. --- src/gallium/drivers/nvc0/nvc0_pc_regalloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d7213949483..f4afe083e2d 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -87,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ -- cgit v1.2.3 From 96121399077787a9701c173dbb3ce0d1f30f00a9 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:23:23 +0100 Subject: nvc0: presin and preex2 can load from const space --- src/gallium/drivers/nvc0/nvc0_pc_print.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 7840078614f..4088a557231 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -353,8 +353,8 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, -- cgit v1.2.3 From b5f04b20089c219f760fb6a369041bd782708247 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 20 Feb 2011 13:13:11 +0100 Subject: nvc0: don't fold loads from local memory --- src/gallium/drivers/nvc0/nvc0_pc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 3a3a00f27be..f51d289e8cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, if (ld->indirect >= 0) return FALSE; + /* a few ops can use g[] sources directly, but we don't support g[] yet */ + if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || + ld->src[0]->value->reg.file == NV_FILE_MEM_G) + return FALSE; + for (i = 0; i < 3 && nvi->src[i]; ++i) if (nvi->src[i]->value->reg.file == NV_FILE_IMM) return FALSE; -- cgit v1.2.3 From 1a8297139396ec2a6415ef803a3901e1ecef485c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 17:26:44 +0100 Subject: nvc0: set local memory usage info in shader header Before this, l[] access was a no-op. --- src/gallium/drivers/nvc0/nvc0_context.h | 1 + src/gallium/drivers/nvc0/nvc0_program.c | 9 ++++++++- src/gallium/drivers/nvc0/nvc0_program.h | 1 + src/gallium/drivers/nvc0/nvc0_screen.c | 8 ++++++-- src/gallium/drivers/nvc0/nvc0_shader_state.c | 15 +++++++++++++++ src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 3 +++ 6 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 3722f358d89..1ce5554f7b7 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -81,6 +81,7 @@ struct nvc0_context { uint8_t num_vtxelts; uint8_t num_textures[5]; uint8_t num_samplers[5]; + uint8_t tls_required; /* bitmask of shader types using l[] */ uint16_t scissor; uint32_t uniform_buffer_bound[5]; } state; diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index f7ea97ddb1d..0685a842304 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -301,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti, ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; + case TGSI_FILE_TEMPORARY: + ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); + break; case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: case TGSI_FILE_IMMEDIATE: @@ -644,6 +646,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) break; } + if (ti->require_stores) { + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + } + assert(!ret); return ret; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index 3450cec175d..f6fea29780b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -76,6 +76,7 @@ struct nvc0_translation_info { uint32_t *immd32; ubyte *immd32_ty; unsigned immd32_nr; + unsigned temp128_nr; ubyte edgeflag_out; struct nvc0_subroutine *subr; unsigned num_subrs; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 321d86bdf1a..f7f1fd09a12 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -475,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (15 << 4) | 1); } - screen->tls_size = 4 * 4 * 32 * 128 * 4; + screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, &screen->tls); if (ret) @@ -489,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, screen->tls_size >> 32); OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D_(0x07a0), 1); + OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); OUT_RING (chan, 0); @@ -642,8 +644,10 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); - nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); + + if (screen->cur_ctx && screen->cur_ctx->state.tls_required) + nouveau_bo_validate(chan, screen->tls, flags); } int diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 633641713dc..357f8b80deb 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -27,6 +27,16 @@ #include "nvc0_context.h" +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + if (prog->hdr[1]) + nvc0->state.tls_required |= 1 << stage; + else + nvc0->state.tls_required &= ~(1 << stage); +} + static boolean nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) { @@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, vp)) return; + nvc0_program_update_context_state(nvc0, vp, 0); BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); OUT_RING (chan, 0x11); @@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, fp)) return; + nvc0_program_update_context_state(nvc0, fp, 4); BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); OUT_RING (chan, fp->fp.early_z); @@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 1); BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); OUT_RING (chan, 0x21); @@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 2); BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); OUT_RING (chan, 0x31); @@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, gp)) return; + nvc0_program_update_context_state(nvc0, gp, 3); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x41); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index fc19ef1eb19..f7dff596c28 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -364,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg, struct nv_basic_block *bb = bld->pc->current_block; struct nv_value *val = NULL; + if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ + return NULL; + if (bld->loop_lvl > 1) { --bld->loop_lvl; if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) -- cgit v1.2.3 From 9dd7d0803e5a881510d05a61908d6a7ffc04d16b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Feb 2011 15:28:04 +0100 Subject: nvc0: fix new_value calls using type instead of size --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 8d4d0f3af60..8b56aa427fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -592,7 +592,7 @@ constant_operand(struct nv_pc *pc, } else if (u.s32 > 0 && u.s32 == (1 << shift)) { nvi->opcode = NV_OP_SHL; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; nv_reference(pc, nvi, 0, nvi->src[t]->value); nv_reference(pc, nvi, 1, val); break; @@ -600,14 +600,14 @@ constant_operand(struct nv_pc *pc, break; case NV_OP_RCP: u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); break; case NV_OP_RSQ: u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); -- cgit v1.2.3 From b0bf4ee85f01e9cbe240e49e67a947d052daa3f3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 15:00:26 +0100 Subject: nvc0: sprite coord enable is per GENERIC, not overall index --- src/gallium/drivers/nvc0/nvc0_state_validate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 0cc0a0c6236..70c418fad9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -302,14 +302,12 @@ nvc0_validate_sprite_coords(struct nvc0_context *nvc0) if (nvc0->rast->pipe.point_quad_rasterization) { uint32_t en = nvc0->rast->pipe.sprite_coord_enable; - int i; - struct nvc0_program *prog = nvc0->fragprog; while (en) { - i = ffs(en) - 1; + int i = ffs(en) - 1; en &= ~(1 << i); - if (prog->fp.in_pos[i] >= 0xc0 && prog->fp.in_pos[i] < 0xe0) - reg |= 8 << ((prog->fp.in_pos[i] - 0xc0) / 4); + if (i >= 0 && i < 8) + reg |= 8 << i; } } -- cgit v1.2.3 From d0caaba370cb70f426180a46e5475bf8a05ac19b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 17:29:02 +0100 Subject: nvc0: change TGSI CMP translation to use slct Saves us the explicit compare instruction needed with selp. --- src/gallium/drivers/nvc0/nvc0_pc_optimize.c | 13 ++++++++++--- src/gallium/drivers/nvc0/nvc0_pc_print.c | 6 +++--- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 8b56aa427fd..c5a7367a5fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -245,7 +245,9 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode) && NV_BASEOP(nvi->opcode) != NV_OP_SET) + if (!nv_op_commutative(nvi->opcode) && + NV_BASEOP(nvi->opcode) != NV_OP_SET && + NV_BASEOP(nvi->opcode) != NV_OP_SLCT) return; assert(src0 && src1 && src0->value && src1->value); @@ -266,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && NV_BASEOP(nvi->opcode) == NV_OP_SET) - nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + if (nvi->src[0] != src0) { + if (NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + else + if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) + nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 4088a557231..90c669cc4b8 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -363,9 +363,9 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index f7dff596c28..a44d330c731 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1493,10 +1493,10 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { src0 = emit_fetch(bld, insn, 0, c); - src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); src1 = emit_fetch(bld, insn, 1, c); src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); + dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); + dst0[c]->insn->set_cond = NV_CC_LT; } break; case TGSI_OPCODE_COS: -- cgit v1.2.3 From 11b9f4439c56045a8f718e483844135dd9fbcb58 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 23 Feb 2011 14:54:25 +0100 Subject: nvc0: fix PointCoord enable in FP header --- src/gallium/drivers/nvc0/nvc0_program.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 0685a842304..899fe147c6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -535,8 +535,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) a = ti->input_loc[i][c] / 2; if (ti->input_loc[i][c] >= 0x2c0) a -= 32; - if ((a & ~7) == 0x70/2) - fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + if (ti->input_loc[i][0] == 0x70) + fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ + else + if (ti->input_loc[i][0] == 0x2e0) + fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ else fp->hdr[4 + a / 32] |= m << (a % 32); } -- cgit v1.2.3