diff options
author | Christian König <[email protected]> | 2011-02-24 22:02:42 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-02-24 22:02:42 +0100 |
commit | b922a0ce12916a91cfc3e56714913fcf63279ff2 (patch) | |
tree | e24fa039925220882d155ccb987f7914e83f4372 /src/gallium | |
parent | f013b4f8f1329982727691a55cc263e3011d02bf (diff) | |
parent | c0ad70ae31ee5501281b434d56e389fc92b13a3a (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
configure.ac
src/gallium/auxiliary/Makefile
src/gallium/auxiliary/SConscript
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state_inlines.h
src/gallium/drivers/r600/r600_texture.c
Diffstat (limited to 'src/gallium')
332 files changed, 12934 insertions, 6947 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 2265f1de46c..31580556f03 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -42,6 +42,7 @@ if env['drm']: # SConscript([ # 'drivers/nouveau/SConscript', # 'drivers/nv50/SConscript', + # 'drivers/nvc0/SConscript', # 'drivers/nvfx/SConscript', # ]) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 6cf1ddd43fe..e40f546929d 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -143,6 +143,7 @@ C_SOURCES = \ util/u_transfer.c \ util/u_resource.c \ util/u_upload_mgr.c \ + util/u_vbuf_mgr.c \ vl/vl_bitstream_parser.c \ vl/vl_mpeg12_mc_renderer.c \ vl/vl_compositor.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index e6806d9a723..11024d41923 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -190,11 +190,11 @@ source = [ 'util/u_tile.c', 'util/u_transfer.c', 'util/u_upload_mgr.c', + 'util/u_vbuf_mgr.c', 'vl/vl_bitstream_parser.c', 'vl/vl_mpeg12_mc_renderer.c', 'vl/vl_compositor.c', 'vl/vl_csc.c', - 'target-helpers/wrap_screen.c', ] if env['llvm']: diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 58b022d531d..fdd40fca126 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -47,41 +47,45 @@ #include "cso_cache/cso_hash.h" #include "cso_context.h" -struct cso_context { - struct pipe_context *pipe; - struct cso_cache *cache; +/** + * Info related to samplers and sampler views. + * We have one of these for fragment samplers and another for vertex samplers. + */ +struct sampler_info +{ struct { void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - - void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; - unsigned nr_vertex_samplers; } hw; void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; - unsigned nr_vertex_samplers; - - unsigned nr_samplers_saved; void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers_saved; + + struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS]; + unsigned nr_views; + + struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_views_saved; +}; + - unsigned nr_vertex_samplers_saved; - void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS]; - uint nr_fragment_sampler_views; - struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; - uint nr_vertex_sampler_views; - struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct sampler_info fragment_samplers; + struct sampler_info vertex_samplers; - uint nr_fragment_sampler_views_saved; - struct pipe_sampler_view *fragment_sampler_views_saved[PIPE_MAX_SAMPLERS]; + uint nr_vertex_buffers; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - uint nr_vertex_sampler_views_saved; - struct pipe_sampler_view *vertex_sampler_views_saved[PIPE_MAX_VERTEX_SAMPLERS]; + uint nr_vertex_buffers_saved; + struct pipe_vertex_buffer vertex_buffers_saved[PIPE_MAX_ATTRIBS]; /** Current and saved state. * The saved state is used as a 1-deep stack. @@ -252,6 +256,8 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) if (ctx == NULL) goto out; + assert(PIPE_MAX_SAMPLERS == PIPE_MAX_VERTEX_SAMPLERS); + ctx->cache = cso_cache_create(); if (ctx->cache == NULL) goto out; @@ -278,7 +284,8 @@ out: void cso_release_all( struct cso_context *ctx ) { unsigned i; - + struct sampler_info *info; + if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -294,19 +301,30 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); } + /* free fragment samplers, views */ + info = &ctx->fragment_samplers; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_sampler_view_reference(&ctx->fragment_sampler_views[i], NULL); - pipe_sampler_view_reference(&ctx->fragment_sampler_views_saved[i], NULL); + pipe_sampler_view_reference(&info->views[i], NULL); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_sampler_view_reference(&ctx->vertex_sampler_views[i], NULL); - pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL); + /* free vertex samplers, views */ + info = &ctx->vertex_samplers; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } util_unreference_framebuffer_state(&ctx->fb); util_unreference_framebuffer_state(&ctx->fb_saved); + util_copy_vertex_buffers(ctx->vertex_buffers, + &ctx->nr_vertex_buffers, + NULL, 0); + util_copy_vertex_buffers(ctx->vertex_buffers_saved, + &ctx->nr_vertex_buffers_saved, + NULL, 0); + if (ctx->cache) { cso_cache_delete( ctx->cache ); ctx->cache = NULL; @@ -395,233 +413,6 @@ void cso_restore_blend(struct cso_context *ctx) -enum pipe_error cso_single_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) -{ - void *handle = NULL; - - if (templ != NULL) { - unsigned key_size = sizeof(struct pipe_sampler_state); - unsigned hash_key = cso_construct_key((void*)templ, key_size); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_SAMPLER, - (void*)templ, key_size); - - if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - if (!cso) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); - if (cso_hash_iter_is_null(iter)) { - FREE(cso); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - handle = cso->data; - } - else { - handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; - } - } - - ctx->samplers[idx] = handle; - return PIPE_OK; -} - -enum pipe_error -cso_single_vertex_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) -{ - void *handle = NULL; - - if (templ != NULL) { - unsigned key_size = sizeof(struct pipe_sampler_state); - unsigned hash_key = cso_construct_key((void*)templ, key_size); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_SAMPLER, - (void*)templ, key_size); - - if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - if (!cso) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); - if (cso_hash_iter_is_null(iter)) { - FREE(cso); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - handle = cso->data; - } - else { - handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; - } - } - - ctx->vertex_samplers[idx] = handle; - return PIPE_OK; -} - -void cso_single_sampler_done( struct cso_context *ctx ) -{ - unsigned i; - - /* find highest non-null sampler */ - for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { - if (ctx->samplers[i - 1] != NULL) - break; - } - - ctx->nr_samplers = i; - - if (ctx->hw.nr_samplers != ctx->nr_samplers || - memcmp(ctx->hw.samplers, - ctx->samplers, - ctx->nr_samplers * sizeof(void *)) != 0) - { - memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); - ctx->hw.nr_samplers = ctx->nr_samplers; - - ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); - } -} - -void -cso_single_vertex_sampler_done(struct cso_context *ctx) -{ - unsigned i; - - /* find highest non-null sampler */ - for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) { - if (ctx->vertex_samplers[i - 1] != NULL) - break; - } - - ctx->nr_vertex_samplers = i; - - if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers || - memcmp(ctx->hw.vertex_samplers, - ctx->vertex_samplers, - ctx->nr_vertex_samplers * sizeof(void *)) != 0) - { - memcpy(ctx->hw.vertex_samplers, - ctx->vertex_samplers, - ctx->nr_vertex_samplers * sizeof(void *)); - ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers; - - ctx->pipe->bind_vertex_sampler_states(ctx->pipe, - ctx->nr_vertex_samplers, - ctx->vertex_samplers); - } -} - -/* - * If the function encouters any errors it will return the - * last one. Done to always try to set as many samplers - * as possible. - */ -enum pipe_error cso_set_samplers( struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates ) -{ - unsigned i; - enum pipe_error temp, error = PIPE_OK; - - /* TODO: fastpath - */ - - for (i = 0; i < nr; i++) { - temp = cso_single_sampler( ctx, i, templates[i] ); - if (temp != PIPE_OK) - error = temp; - } - - for ( ; i < ctx->nr_samplers; i++) { - temp = cso_single_sampler( ctx, i, NULL ); - if (temp != PIPE_OK) - error = temp; - } - - cso_single_sampler_done( ctx ); - - return error; -} - -void cso_save_samplers(struct cso_context *ctx) -{ - ctx->nr_samplers_saved = ctx->nr_samplers; - memcpy(ctx->samplers_saved, ctx->samplers, sizeof(ctx->samplers)); -} - -void cso_restore_samplers(struct cso_context *ctx) -{ - ctx->nr_samplers = ctx->nr_samplers_saved; - memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers)); - cso_single_sampler_done( ctx ); -} - -/* - * If the function encouters any errors it will return the - * last one. Done to always try to set as many samplers - * as possible. - */ -enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates) -{ - unsigned i; - enum pipe_error temp, error = PIPE_OK; - - /* TODO: fastpath - */ - - for (i = 0; i < nr; i++) { - temp = cso_single_vertex_sampler( ctx, i, templates[i] ); - if (temp != PIPE_OK) - error = temp; - } - - for ( ; i < ctx->nr_samplers; i++) { - temp = cso_single_vertex_sampler( ctx, i, NULL ); - if (temp != PIPE_OK) - error = temp; - } - - cso_single_vertex_sampler_done( ctx ); - - return error; -} - -void -cso_save_vertex_samplers(struct cso_context *ctx) -{ - ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers; - memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers)); -} - -void -cso_restore_vertex_samplers(struct cso_context *ctx) -{ - ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved; - memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers)); - cso_single_vertex_sampler_done(ctx); -} - - enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -1143,121 +934,361 @@ void cso_restore_vertex_elements(struct cso_context *ctx) ctx->velements_saved = NULL; } -/* fragment sampler view state */ +/* vertex buffers */ -void -cso_set_fragment_sampler_views(struct cso_context *cso, - uint count, - struct pipe_sampler_view **views) +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers) { - uint i; - - for (i = 0; i < count; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], views[i]); + if (count != ctx->nr_vertex_buffers || + memcmp(buffers, ctx->vertex_buffers, + sizeof(struct pipe_vertex_buffer) * count) != 0) { + util_copy_vertex_buffers(ctx->vertex_buffers, &ctx->nr_vertex_buffers, + buffers, count); + ctx->pipe->set_vertex_buffers(ctx->pipe, count, buffers); } - for (; i < cso->nr_fragment_sampler_views; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); +} + +void cso_save_vertex_buffers(struct cso_context *ctx) +{ + util_copy_vertex_buffers(ctx->vertex_buffers_saved, + &ctx->nr_vertex_buffers_saved, + ctx->vertex_buffers, + ctx->nr_vertex_buffers); +} + +void cso_restore_vertex_buffers(struct cso_context *ctx) +{ + util_copy_vertex_buffers(ctx->vertex_buffers, + &ctx->nr_vertex_buffers, + ctx->vertex_buffers_saved, + ctx->nr_vertex_buffers_saved); + ctx->pipe->set_vertex_buffers(ctx->pipe, ctx->nr_vertex_buffers, + ctx->vertex_buffers); +} + + +/**************** fragment/vertex sampler view state *************************/ + +static enum pipe_error +single_sampler(struct cso_context *ctx, + struct sampler_info *info, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); + struct cso_hash_iter iter = + cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void *) templ, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } } - cso->pipe->set_fragment_sampler_views(cso->pipe, - MAX2(count, cso->nr_fragment_sampler_views), - cso->fragment_sampler_views); + info->samplers[idx] = handle; - cso->nr_fragment_sampler_views = count; + return PIPE_OK; } -void -cso_save_fragment_sampler_views(struct cso_context *cso) +enum pipe_error +cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) { - uint i; + return single_sampler(ctx, &ctx->fragment_samplers, idx, templ); +} + +enum pipe_error +cso_single_vertex_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + return single_sampler(ctx, &ctx->vertex_samplers, idx, templ); +} + - cso->nr_fragment_sampler_views_saved = cso->nr_fragment_sampler_views; - for (i = 0; i < cso->nr_fragment_sampler_views; i++) { - assert(!cso->fragment_sampler_views_saved[i]); +static void +single_sampler_done(struct cso_context *ctx, + struct sampler_info *info) +{ + unsigned i; - pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], - cso->fragment_sampler_views[i]); + /* find highest non-null sampler */ + for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { + if (info->samplers[i - 1] != NULL) + break; + } + + info->nr_samplers = i; + + if (info->hw.nr_samplers != info->nr_samplers || + memcmp(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)) != 0) + { + memcpy(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)); + info->hw.nr_samplers = info->nr_samplers; + + if (info == &ctx->fragment_samplers) { + ctx->pipe->bind_fragment_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + } + else if (info == &ctx->vertex_samplers) { + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + } + else { + assert(0); + } } } void -cso_restore_fragment_sampler_views(struct cso_context *cso) +cso_single_sampler_done( struct cso_context *ctx ) { - uint i; + single_sampler_done(ctx, &ctx->fragment_samplers); +} + +void +cso_single_vertex_sampler_done(struct cso_context *ctx) +{ + single_sampler_done(ctx, &ctx->vertex_samplers); +} - for (i = 0; i < cso->nr_fragment_sampler_views_saved; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], cso->fragment_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], NULL); + +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +static enum pipe_error +set_samplers(struct cso_context *ctx, + struct sampler_info *info, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = single_sampler(ctx, info, i, templates[i]); + if (temp != PIPE_OK) + error = temp; } - for (; i < cso->nr_fragment_sampler_views; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); + + for ( ; i < info->nr_samplers; i++) { + temp = single_sampler(ctx, info, i, NULL); + if (temp != PIPE_OK) + error = temp; } - cso->pipe->set_fragment_sampler_views(cso->pipe, - MAX2(cso->nr_fragment_sampler_views, cso->nr_fragment_sampler_views_saved), - cso->fragment_sampler_views); + single_sampler_done(ctx, info); + + return error; +} + +enum pipe_error +cso_set_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + return set_samplers(ctx, &ctx->fragment_samplers, nr, templates); +} - cso->nr_fragment_sampler_views = cso->nr_fragment_sampler_views_saved; - cso->nr_fragment_sampler_views_saved = 0; +enum pipe_error +cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + return set_samplers(ctx, &ctx->vertex_samplers, nr, templates); } -/* vertex sampler view state */ + +static void +save_samplers(struct cso_context *ctx, struct sampler_info *info) +{ + info->nr_samplers_saved = info->nr_samplers; + memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers)); +} void -cso_set_vertex_sampler_views(struct cso_context *cso, - uint count, - struct pipe_sampler_view **views) +cso_save_samplers(struct cso_context *ctx) +{ + save_samplers(ctx, &ctx->fragment_samplers); +} + +void +cso_save_vertex_samplers(struct cso_context *ctx) +{ + save_samplers(ctx, &ctx->vertex_samplers); +} + + + +static void +restore_samplers(struct cso_context *ctx, struct sampler_info *info) +{ + info->nr_samplers = info->nr_samplers_saved; + memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers)); + single_sampler_done(ctx, info); +} + +void +cso_restore_samplers(struct cso_context *ctx) +{ + restore_samplers(ctx, &ctx->fragment_samplers); +} + +void +cso_restore_vertex_samplers(struct cso_context *ctx) +{ + restore_samplers(ctx, &ctx->vertex_samplers); +} + + + +static void +set_sampler_views(struct cso_context *ctx, + struct sampler_info *info, + void (*set_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **), + uint count, + struct pipe_sampler_view **views) { uint i; + /* reference new views */ for (i = 0; i < count; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], views[i]); + pipe_sampler_view_reference(&info->views[i], views[i]); } - for (; i < cso->nr_vertex_sampler_views; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + /* unref extra old views, if any */ + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); } - cso->pipe->set_vertex_sampler_views(cso->pipe, - MAX2(count, cso->nr_vertex_sampler_views), - cso->vertex_sampler_views); + info->nr_views = count; - cso->nr_vertex_sampler_views = count; + /* bind the new sampler views */ + set_views(ctx->pipe, count, info->views); } void -cso_save_vertex_sampler_views(struct cso_context *cso) +cso_set_fragment_sampler_views(struct cso_context *ctx, + uint count, + struct pipe_sampler_view **views) { - uint i; + set_sampler_views(ctx, &ctx->fragment_samplers, + ctx->pipe->set_fragment_sampler_views, + count, views); +} - cso->nr_vertex_sampler_views_saved = cso->nr_vertex_sampler_views; +void +cso_set_vertex_sampler_views(struct cso_context *ctx, + uint count, + struct pipe_sampler_view **views) +{ + set_sampler_views(ctx, &ctx->vertex_samplers, + ctx->pipe->set_vertex_sampler_views, + count, views); +} - for (i = 0; i < cso->nr_vertex_sampler_views; i++) { - assert(!cso->vertex_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], - cso->vertex_sampler_views[i]); + +static void +save_sampler_views(struct cso_context *ctx, + struct sampler_info *info) +{ + uint i; + + info->nr_views_saved = info->nr_views; + + for (i = 0; i < info->nr_views; i++) { + assert(!info->views_saved[i]); + pipe_sampler_view_reference(&info->views_saved[i], info->views[i]); } } void -cso_restore_vertex_sampler_views(struct cso_context *cso) +cso_save_fragment_sampler_views(struct cso_context *ctx) +{ + save_sampler_views(ctx, &ctx->fragment_samplers); +} + +void +cso_save_vertex_sampler_views(struct cso_context *ctx) +{ + save_sampler_views(ctx, &ctx->vertex_samplers); +} + + +static void +restore_sampler_views(struct cso_context *ctx, + struct sampler_info *info, + void (*set_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **)) { uint i; - for (i = 0; i < cso->nr_vertex_sampler_views_saved; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], cso->vertex_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], NULL); + for (i = 0; i < info->nr_views_saved; i++) { + pipe_sampler_view_reference(&info->views[i], info->views_saved[i]); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } - for (; i < cso->nr_vertex_sampler_views; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); } - cso->pipe->set_vertex_sampler_views(cso->pipe, - MAX2(cso->nr_vertex_sampler_views, cso->nr_vertex_sampler_views_saved), - cso->vertex_sampler_views); + /* bind the old/saved sampler views */ + set_views(ctx->pipe, info->nr_views_saved, info->views); + + info->nr_views = info->nr_views_saved; + info->nr_views_saved = 0; +} + +void +cso_restore_fragment_sampler_views(struct cso_context *ctx) +{ + restore_sampler_views(ctx, &ctx->fragment_samplers, + ctx->pipe->set_fragment_sampler_views); +} - cso->nr_vertex_sampler_views = cso->nr_vertex_sampler_views_saved; - cso->nr_vertex_sampler_views_saved = 0; +void +cso_restore_vertex_sampler_views(struct cso_context *ctx) +{ + restore_sampler_views(ctx, &ctx->vertex_samplers, + ctx->pipe->set_vertex_sampler_views); } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index f0b07f73765..00edc9f8dd4 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -110,6 +110,13 @@ void cso_save_vertex_elements(struct cso_context *ctx); void cso_restore_vertex_elements(struct cso_context *ctx); +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers); +void cso_save_vertex_buffers(struct cso_context *ctx); +void cso_restore_vertex_buffers(struct cso_context *ctx); + + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't * (eg mesa's internall-generated texenv programs), it will be up to diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index e045313b94f..95d96719873 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -88,8 +88,14 @@ draw_create_gallivm(struct pipe_context *pipe, struct gallivm_state *gallivm) goto fail; #if HAVE_LLVM - if (draw_get_option_use_llvm() && gallivm) { - draw->llvm = draw_llvm_create(draw, gallivm); + if (draw_get_option_use_llvm()) { + if (!gallivm) { + gallivm = gallivm_create(); + draw->own_gallivm = gallivm; + } + + if (gallivm) + draw->llvm = draw_llvm_create(draw, gallivm); } #endif @@ -180,8 +186,11 @@ void draw_destroy( struct draw_context *draw ) draw_vs_destroy( draw ); draw_gs_destroy( draw ); #ifdef HAVE_LLVM - if(draw->llvm) + if (draw->llvm) draw_llvm_destroy( draw->llvm ); + + if (draw->own_gallivm) + gallivm_destroy(draw->own_gallivm); #endif FREE( draw ); @@ -401,7 +410,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->pipeline.wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = roundf(threshold); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 0c51aa85b37..a5217c1d4ec 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -214,13 +214,12 @@ static LLVMTypeRef create_jit_vertex_buffer_type(struct gallivm_state *gallivm) { LLVMTargetDataRef target = gallivm->target; - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[3]; LLVMTypeRef vb_type; elem_types[0] = - elem_types[1] = - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); - elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */ + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -229,10 +228,8 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, max_index, - target, vb_type, 1); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); + target, vb_type, 1); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -513,9 +510,7 @@ generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); - LLVMValueRef cond; LLVMValueRef stride; if (velem->instance_divisor) { @@ -525,10 +520,6 @@ generate_fetch(struct gallivm_state *gallivm, "instance_divisor"); } - /* limit index to min(index, vb_max_index) */ - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); - index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); - stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 9f038f1f04d..e8623e7bcdc 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -133,11 +133,8 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, 0, "stride") -#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 1, "max_index") - #define draw_jit_vbuffer_offset(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset") + lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") typedef int diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 6206197dae9..f1b0171f520 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -64,8 +64,8 @@ boolean draw_pipeline_init( struct draw_context *draw ) return FALSE; /* these defaults are oriented toward the needs of softpipe */ - draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ - draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.wide_point_threshold = 1000000.0f; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0f; draw->pipeline.wide_point_sprites = FALSE; draw->pipeline.line_stipple = TRUE; draw->pipeline.point_sprite = TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 0851b9acc0d..32af29ae144 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -55,9 +55,16 @@ /** + * Size for the alpha texture used for antialiasing + */ +#define TEXTURE_SIZE_LOG2 5 /* 32 x 32 */ + +/** * Max texture level for the alpha texture used for antialiasing + * + * Don't use the 1x1 and 2x2 mipmap levels. */ -#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ +#define MAX_TEXTURE_LEVEL (TEXTURE_SIZE_LOG2 - 2) /** @@ -403,8 +410,8 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; - texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.width0 = 1 << TEXTURE_SIZE_LOG2; + texTemp.height0 = 1 << TEXTURE_SIZE_LOG2; texTemp.depth0 = 1; texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; @@ -461,7 +468,7 @@ aaline_create_texture(struct aaline_stage *aaline) d = 200; /* tuneable */ } else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { - d = 0; + d = 35; /* edge texel */ } else { d = 255; @@ -498,8 +505,7 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; sampler.normalized_coords = 1; sampler.min_lod = 0.0f; - /* avoid using the 1x1 and 2x2 mipmap levels */ - sampler.max_lod = MAX_TEXTURE_LEVEL - 2; + sampler.max_lod = MAX_TEXTURE_LEVEL; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); if (aaline->sampler_cso == NULL) diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index f5515c1df76..ea62c9739fd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -27,8 +27,9 @@ /** * Polygon stipple stage: implement polygon stipple with texture map and - * fragment program. The fragment program samples the texture and does - * a fragment kill for the stipple-failing fragments. + * fragment program. The fragment program samples the texture using the + * fragment window coordinate register and does a fragment kill for the + * stipple-failing fragments. * * Authors: Brian Paul */ @@ -114,7 +115,8 @@ struct pstip_stage /** * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the special AA instructions. + * user's fragment shader to add the extra texture sample and fragment kill + * instructions. */ struct pstip_transform_context { struct tgsi_transform_context base; @@ -658,16 +660,16 @@ pstip_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + struct pstip_fragment_shader *pstipfs = CALLOC_STRUCT(pstip_fragment_shader); - if (aafs) { - aafs->state = *fs; + if (pstipfs) { + pstipfs->state = *fs; /* pass-through */ - aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + pstipfs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); } - return aafs; + return pstipfs; } @@ -675,12 +677,12 @@ static void pstip_bind_fs_state(struct pipe_context *pipe, void *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs; /* save current */ - pstip->fs = aafs; + pstip->fs = pstipfs; /* pass-through */ pstip->driver_bind_fs_state(pstip->pipe, - (aafs ? aafs->driver_fs : NULL)); + (pstipfs ? pstipfs->driver_fs : NULL)); } @@ -688,14 +690,14 @@ static void pstip_delete_fs_state(struct pipe_context *pipe, void *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs; /* pass-through */ - pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_delete_fs_state(pstip->pipe, pstipfs->driver_fs); - if (aafs->pstip_fs) - pstip->driver_delete_fs_state(pstip->pipe, aafs->pstip_fs); + if (pstipfs->pstip_fs) + pstip->driver_delete_fs_state(pstip->pipe, pstipfs->pstip_fs); - FREE(aafs); + FREE(pstipfs); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index c575a8ac7ca..27afba5af3d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" @@ -86,7 +87,7 @@ draw_need_pipeline(const struct draw_context *draw, return TRUE; /* wide lines */ - if (rasterizer->line_width > draw->pipeline.wide_line_threshold) + if (roundf(rasterizer->line_width) > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ @@ -169,7 +170,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold + wide_lines = (roundf(rast->line_width) > draw->pipeline.wide_line_threshold && !rast->line_smooth); /* drawing large/sprite points (but not AA points)? */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 06ed4d60ef2..db2e3c5410d 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -286,6 +286,7 @@ struct draw_context #ifdef HAVE_LLVM struct draw_llvm *llvm; + struct gallivm_state *own_gallivm; #endif struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 4078b2a07d0..c3d7e871f7a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -459,10 +459,9 @@ draw_vbo(struct draw_context *draw, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u offset=%u ptr=%p\n", i, draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index ae12ee24bdc..4fa3b265e10 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run_elts( translate, @@ -166,7 +166,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e706b7796f8..51043102a61 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index c98fb3d5205..1e926fb028e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index ); + draw->pt.user.max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index c43ee8ac638..c261d761161 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -231,44 +231,53 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, assert(dst_type.floating); - /* Special-case int8->float, though most cases could be handled - * this way: - */ - if (src_width == 8) { - scale = 1.0/255.0; + mantissa = lp_mantissa(dst_type); + + if (src_width <= (mantissa + 1)) { + /* + * The source width matches fits what can be represented in floating + * point (i.e., mantissa + 1 bits). So do a straight multiplication + * followed by casting. No further rounding is necessary. + */ + + scale = 1.0/(double)((1ULL << src_width) - 1); res = LLVMBuildSIToFP(builder, src, vec_type, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } + else { + /* + * The source width exceeds what can be represented in floating + * point. So truncate the incoming values. + */ - mantissa = lp_mantissa(dst_type); - - n = MIN2(mantissa, src_width); + n = MIN2(mantissa, src_width); - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)ubound/mask; - bias = (double)((unsigned long long)1 << (mantissa - n)); + ubound = ((unsigned long long)1 << n); + mask = ubound - 1; + scale = (double)ubound/mask; + bias = (double)((unsigned long long)1 << (mantissa - n)); - res = src; + res = src; - if(src_width > mantissa) { - int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, - lp_build_const_int_vec(gallivm, dst_type, shift), ""); - } + if (src_width > mantissa) { + int shift = src_width - mantissa; + res = LLVMBuildLShr(builder, res, + lp_build_const_int_vec(gallivm, dst_type, shift), ""); + } - bias_ = lp_build_const_vec(gallivm, dst_type, bias); + bias_ = lp_build_const_vec(gallivm, dst_type, bias); - res = LLVMBuildOr(builder, - res, - LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); + res = LLVMBuildOr(builder, + res, + LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); - res = LLVMBuildBitCast(builder, res, vec_type, ""); + res = LLVMBuildBitCast(builder, res, vec_type, ""); - res = LLVMBuildFSub(builder, res, bias_, ""); - res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); + res = LLVMBuildFSub(builder, res, bias_, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); + } return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 7504cb5cf2f..45addee8fab 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -222,11 +222,12 @@ free_gallivm_state(struct gallivm_state *gallivm) static boolean init_gallivm_state(struct gallivm_state *gallivm) { - assert(gallivm_initialized); assert(!gallivm->context); assert(!gallivm->module); assert(!gallivm->provider); + lp_build_init(); + gallivm->context = LLVMContextCreate(); if (!gallivm->context) goto fail; @@ -387,6 +388,9 @@ gallivm_garbage_collect(struct gallivm_state *gallivm) void lp_build_init(void) { + if (gallivm_initialized) + return; + #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 2ef02160f23..960068c494d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -82,6 +82,10 @@ struct pb_manager */ void (*flush)( struct pb_manager *mgr ); + + boolean + (*is_buffer_busy)( struct pb_manager *mgr, + struct pb_buffer *buf ); }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index a6eb4039621..25accefa8d6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -227,8 +227,6 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, pb_size size, const struct pb_desc *desc) { - void *map; - if(buf->base.base.size < size) return 0; @@ -242,13 +240,18 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, if(!pb_check_usage(desc->usage, buf->base.base.usage)) return 0; - map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); - if (!map) { - return -1; + if (buf->mgr->provider->is_buffer_busy) { + if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer)) + return -1; + } else { + void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); + + if (!ptr) + return -1; + + pb_unmap(buf->buffer); } - pb_unmap(buf->buffer); - return 1; } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 75b0f6a68ea..b03dd3a0cf8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -2132,7 +2132,8 @@ struct x86_reg x86_fn_arg( struct x86_function *p, return x86_make_disp(x86_make_reg(file_REG32, reg_SP), p->stack_offset + arg * 4); /* ??? */ default: - abort(); + assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); + return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 16a205f2068..d2cb98e84af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_debug.h" +#include "pipe/p_format.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" @@ -226,6 +227,45 @@ tgsi_build_declaration_semantic( return ds; } + +static struct tgsi_declaration_resource +tgsi_default_declaration_resource(void) +{ + struct tgsi_declaration_resource declaration_resource; + + declaration_resource.Resource = TGSI_TEXTURE_UNKNOWN; + declaration_resource.ReturnTypeX = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeY = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeZ = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeW = PIPE_TYPE_UNORM; + + return declaration_resource; +} + +static struct tgsi_declaration_resource +tgsi_build_declaration_resource(unsigned texture, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_resource declaration_resource; + + declaration_resource = tgsi_default_declaration_resource(); + declaration_resource.Resource = texture; + declaration_resource.ReturnTypeX = return_type_x; + declaration_resource.ReturnTypeY = return_type_y; + declaration_resource.ReturnTypeZ = return_type_z; + declaration_resource.ReturnTypeW = return_type_w; + + declaration_grow(declaration, header); + + return declaration_resource; +} + + struct tgsi_full_declaration tgsi_default_full_declaration( void ) { @@ -235,6 +275,7 @@ tgsi_default_full_declaration( void ) full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); full_declaration.ImmediateData.u = NULL; + full_declaration.Resource = tgsi_default_declaration_resource(); return full_declaration; } @@ -324,6 +365,24 @@ tgsi_build_full_declaration( } } + if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) { + struct tgsi_declaration_resource *dr; + + if (maxsize <= size) { + return 0; + } + dr = (struct tgsi_declaration_resource *)&tokens[size]; + size++; + + *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource, + full_decl->Resource.ReturnTypeX, + full_decl->Resource.ReturnTypeY, + full_decl->Resource.ReturnTypeZ, + full_decl->Resource.ReturnTypeW, + declaration, + header); + } + return size; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 82cd8eaa969..c12662076b1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -104,7 +104,8 @@ tgsi_file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static const char *interpolate_names[] = @@ -157,7 +158,9 @@ tgsi_texture_names[TGSI_TEXTURE_COUNT] = "RECT", "SHADOW1D", "SHADOW2D", - "SHADOWRECT" + "SHADOWRECT", + "1DARRAY", + "2DARRAY" }; const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = @@ -170,6 +173,15 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "FS_COLOR0_WRITES_ALL_CBUFS", }; +static const char *tgsi_type_names[] = +{ + "UNORM", + "SNORM", + "SINT", + "UINT", + "FLOAT" +}; + const char *tgsi_primitive_names[PIPE_PRIM_MAX] = { "POINTS", @@ -393,6 +405,26 @@ iter_declaration( } } + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + TXT(", "); + ENM(decl->Resource.Resource, tgsi_texture_names); + TXT(", "); + if ((decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeY) && + (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeZ) && + (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeW)) { + ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + } else { + ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeY, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeZ, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeW, tgsi_type_names); + } + + } + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && decl->Declaration.File == TGSI_FILE_INPUT) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 35b27423513..cbb090b2803 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1750,6 +1750,36 @@ exec_tex(struct tgsi_exec_machine *mach, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; + case TGSI_TEXTURE_1D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_2D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: FETCH(&r[0], 0, CHAN_X); @@ -1843,6 +1873,164 @@ exec_txd(struct tgsi_exec_machine *mach, } + +static void +exec_sample(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint modifier) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; + uint chan; + + if (modifier != TEX_MODIFIER_NONE) { + if (modifier == TEX_MODIFIER_LOD_BIAS) + FETCH(&r[3], 3, CHAN_X); + else /*TEX_MODIFIER_LOD*/ + FETCH(&r[3], 0, CHAN_W); + + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + FETCH(&r[0], 0, CHAN_X); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, + control, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_sample_d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + uint chan; + /* + * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet. + */ + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + + /** * Evaluate a constant-valued coefficient at the position of the * current quad. @@ -1914,6 +2102,11 @@ static void exec_declaration(struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl) { + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + mach->Resources[decl->Range.First] = decl->Resource; + return; + } + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; @@ -3688,6 +3881,54 @@ exec_instruction( exec_endswitch(mach); break; + case TGSI_OPCODE_LOAD: + assert(0); + break; + + case TGSI_OPCODE_LOAD_MS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_B: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_C: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_C_LZ: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_D: + exec_sample_d(mach, inst); + break; + + case TGSI_OPCODE_SAMPLE_L: + exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); + break; + + case TGSI_OPCODE_GATHER4: + assert(0); + break; + + case TGSI_OPCODE_RESINFO: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_POS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_INFO: + assert(0); + break; + default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 6c204c73714..4a423b5bb4e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -308,6 +308,7 @@ struct tgsi_exec_machine struct tgsi_full_declaration *Declarations; uint NumDeclarations; + struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES]; }; struct tgsi_exec_machine * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index e59e964ffa7..14ed56a1f6a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -175,7 +175,20 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, - { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, + + { 1, 2, 0, 0, 0, 0, "LOAD", TGSI_OPCODE_LOAD }, + { 1, 2, 0, 0, 0, 0, "LOAD_MS", TGSI_OPCODE_LOAD_MS }, + { 1, 3, 0, 0, 0, 0, "SAMPLE", TGSI_OPCODE_SAMPLE }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, + { 1, 5, 0, 0, 0, 0, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, + { 1, 3, 0, 0, 0, 0, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, + { 1, 3, 0, 0, 0, 0, "GATHER4", TGSI_OPCODE_GATHER4 }, + { 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO }, + { 1, 2, 0, 0, 0, 0, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, + { 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b3123ed016d..b5d4504425b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -168,6 +168,19 @@ OP01(CASE) OP00(DEFAULT) OP00(ENDSWITCH) +OP12(LOAD) +OP12(LOAD_MS) +OP13(SAMPLE) +OP14(SAMPLE_B) +OP14(SAMPLE_C) +OP14(SAMPLE_C_LZ) +OP15(SAMPLE_D) +OP13(SAMPLE_L) +OP13(GATHER4) +OP12(RESINFO) +OP13(SAMPLE_POS) +OP12(SAMPLE_INFO) + #undef OP00 #undef OP01 @@ -180,6 +193,10 @@ OP00(ENDSWITCH) #undef OP14 #endif +#ifdef OP15 +#undef OP15 +#endif + #undef OP00_LBL #undef OP01_LBL diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 1891203abe1..fb36f9de32d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -128,6 +128,10 @@ tgsi_parse_token( } } + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + next_token(ctx, &decl->Resource); + } + break; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 2aafa2a6e83..b7a3c9bc0e6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -69,6 +69,7 @@ struct tgsi_full_declaration struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; struct tgsi_immediate_array_data ImmediateData; + struct tgsi_declaration_resource Resource; }; struct tgsi_full_immediate @@ -84,7 +85,7 @@ struct tgsi_full_property }; #define TGSI_FULL_MAX_DST_REGISTERS 2 -#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ +#define TGSI_FULL_MAX_SRC_REGISTERS 5 /* SAMPLE_D has 5 */ struct tgsi_full_instruction { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index acbff103efe..509c5346837 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -258,7 +258,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3f2cda860e0..92ba8b8f033 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1514,6 +1514,7 @@ emit_tex( struct x86_function *func, break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_1D_ARRAY: count = 2; break; case TGSI_TEXTURE_SHADOW1D: @@ -1521,6 +1522,7 @@ emit_tex( struct x86_function *func, case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_2D_ARRAY: count = 3; break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 819b0725a1f..1eac762e6e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -283,7 +283,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static boolean @@ -828,6 +829,15 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] = "SHADOWRECT" }; +static const char *type_names[] = +{ + "UNORM", + "SNORM", + "SINT", + "UINT", + "FLOAT" +}; + static boolean match_inst_mnemonic(const char **pcur, const struct tgsi_opcode_info *info) @@ -1104,42 +1114,113 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur = ctx->cur; eat_opt_white( &cur ); if (*cur == ',' && !is_vs_input) { - uint i; + uint i, j; cur++; eat_opt_white( &cur ); - for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { - if (str_match_no_case( &cur, semantic_names[i] )) { - const char *cur2 = cur; - uint index; - - if (is_digit_alpha_underscore( cur )) - continue; - eat_opt_white( &cur2 ); - if (*cur2 == '[') { - cur2++; - eat_opt_white( &cur2 ); - if (!parse_uint( &cur2, &index )) { - report_error( ctx, "Expected literal integer" ); + if (file == TGSI_FILE_RESOURCE) { + for (i = 0; i < TGSI_TEXTURE_COUNT; i++) { + if (str_match_no_case(&cur, texture_names[i])) { + if (!is_digit_alpha_underscore(cur)) { + decl.Resource.Resource = i; + break; + } + } + } + if (i == TGSI_TEXTURE_COUNT) { + report_error(ctx, "Expected texture target"); + return FALSE; + } + eat_opt_white( &cur ); + if (*cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ++cur; + eat_opt_white( &cur ); + for (j = 0; j < 4; ++j) { + for (i = 0; i < PIPE_TYPE_COUNT; ++i) { + if (str_match_no_case(&cur, type_names[i])) { + if (!is_digit_alpha_underscore(cur)) { + switch (j) { + case 0: + decl.Resource.ReturnTypeX = i; + break; + case 1: + decl.Resource.ReturnTypeY = i; + break; + case 2: + decl.Resource.ReturnTypeZ = i; + break; + case 3: + decl.Resource.ReturnTypeW = i; + break; + default: + assert(0); + } + break; + } + } + } + if (i == PIPE_TYPE_COUNT) { + if (j == 0 || j > 2) { + report_error(ctx, "Expected type name"); return FALSE; } + break; + } else { + const char *cur2 = cur; eat_opt_white( &cur2 ); - if (*cur2 != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; + if (*cur2 == ',') { + cur2++; + eat_opt_white( &cur2 ); + cur = cur2; + continue; + } else + break; + } + } + if (j < 4) { + decl.Resource.ReturnTypeY = + decl.Resource.ReturnTypeZ = + decl.Resource.ReturnTypeW = + decl.Resource.ReturnTypeX; + } + ctx->cur = cur; + } else { + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.Index = index; + + cur = cur2; } - cur2++; - decl.Semantic.Index = index; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = i; - cur = cur2; + ctx->cur = cur; + break; } - - decl.Declaration.Semantic = 1; - decl.Semantic.Name = i; - - ctx->cur = cur; - break; } } } else if (is_imm_array) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 02de12d77d5..4564ab81f99 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -47,6 +47,7 @@ union tgsi_any_token { struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_semantic decl_semantic; + struct tgsi_declaration_resource decl_resource; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; @@ -137,6 +138,16 @@ struct ureg_program struct ureg_src sampler[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + struct { + unsigned index; + unsigned target; + unsigned return_type_x; + unsigned return_type_y; + unsigned return_type_z; + unsigned return_type_w; + } resource[PIPE_MAX_SHADER_RESOURCES]; + unsigned nr_resources; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; @@ -578,6 +589,41 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, return ureg->sampler[0]; } +/* + * Allocate a new shader resource. + */ +struct ureg_src +ureg_DECL_resource(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_RESOURCE, index); + uint i; + + for (i = 0; i < ureg->nr_resources; i++) { + if (ureg->resource[i].index == index) { + return reg; + } + } + + if (i < PIPE_MAX_SHADER_RESOURCES) { + ureg->resource[i].index = index; + ureg->resource[i].target = target; + ureg->resource[i].return_type_x = return_type_x; + ureg->resource[i].return_type_y = return_type_y; + ureg->resource[i].return_type_z = return_type_z; + ureg->resource[i].return_type_w = return_type_w; + ureg->nr_resources++; + return reg; + } + + assert(0); + return reg; +} static int match_or_expand_immediate( const unsigned *v, @@ -821,9 +867,10 @@ ureg_emit_dst( struct ureg_program *ureg, assert(dst.File != TGSI_FILE_CONSTANT); assert(dst.File != TGSI_FILE_INPUT); assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_RESOURCE); assert(dst.File != TGSI_FILE_IMMEDIATE); assert(dst.File < TGSI_FILE_COUNT); - + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -1206,6 +1253,36 @@ emit_decl_range2D(struct ureg_program *ureg, } static void +emit_decl_resource(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_RESOURCE; + out[0].decl.UsageMask = 0xf; + out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_resource.Resource = target; + out[2].decl_resource.ReturnTypeX = return_type_x; + out[2].decl_resource.ReturnTypeY = return_type_y; + out[2].decl_resource.ReturnTypeZ = return_type_z; + out[2].decl_resource.ReturnTypeW = return_type_w; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1341,6 +1418,16 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } + for (i = 0; i < ureg->nr_resources; i++) { + emit_decl_resource(ureg, + ureg->resource[i].index, + ureg->resource[i].target, + ureg->resource[i].return_type_x, + ureg->resource[i].return_type_y, + ureg->resource[i].return_type_z, + ureg->resource[i].return_type_w); + } + if (ureg->const_decls.nr_constant_ranges) { for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { emit_decl_range(ureg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 807128a5e52..b8d193f3f89 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -270,6 +270,15 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *, unsigned index ); +struct ureg_src +ureg_DECL_resource(struct ureg_program *, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ); + static INLINE struct ureg_src ureg_imm4f( struct ureg_program *ureg, @@ -733,6 +742,66 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ } +#define OP14( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2, \ + struct ureg_src src3 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 4).insn_token; \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_emit_src( ureg, src3 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + +#define OP15( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2, \ + struct ureg_src src3, \ + struct ureg_src src4 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 5).insn_token; \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_emit_src( ureg, src3 ); \ + ureg_emit_src( ureg, src4 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + /* Use a template include to generate a correctly-typed ureg_OP() * function for each TGSI opcode: */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 08e7e89bd67..aa9a886d4a9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -280,7 +280,12 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_TEXTURE_CUBE: read_mask = TGSI_WRITEMASK_XYZ; break; - + case TGSI_TEXTURE_1D_ARRAY: + read_mask = TGSI_WRITEMASK_XY; + break; + case TGSI_TEXTURE_2D_ARRAY: + read_mask = TGSI_WRITEMASK_XYZ; + break; default: assert(0); read_mask = 0; diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index d8069a149cf..3f1ecb630f2 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -40,6 +40,10 @@ struct translate_cache { struct translate_cache * translate_cache_create( void ) { struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + if (cache == NULL) { + return NULL; + } + cache->hash = cso_hash_create(); return cache; } diff --git a/src/gallium/auxiliary/util/dbghelp.h b/src/gallium/auxiliary/util/dbghelp.h new file mode 100644 index 00000000000..bc7c53cf9f2 --- /dev/null +++ b/src/gallium/auxiliary/util/dbghelp.h @@ -0,0 +1,1265 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the w64 mingw-runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#ifndef _DBGHELP_ +#define _DBGHELP_ + +#ifdef _WIN64 +#ifndef _IMAGEHLP64 +#define _IMAGEHLP64 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define IMAGEAPI DECLSPEC_IMPORT WINAPI +#define DBHLP_DEPRECIATED __declspec(deprecated) + +#define DBHLPAPI IMAGEAPI + +#define IMAGE_SEPARATION (64*1024) + + typedef struct _LOADED_IMAGE { + PSTR ModuleName; + HANDLE hFile; + PUCHAR MappedAddress; +#ifdef _IMAGEHLP64 + PIMAGE_NT_HEADERS64 FileHeader; +#else + PIMAGE_NT_HEADERS32 FileHeader; +#endif + PIMAGE_SECTION_HEADER LastRvaSection; + ULONG NumberOfSections; + PIMAGE_SECTION_HEADER Sections; + ULONG Characteristics; + BOOLEAN fSystemImage; + BOOLEAN fDOSImage; + LIST_ENTRY Links; + ULONG SizeOfImage; + } LOADED_IMAGE,*PLOADED_IMAGE; + +#define MAX_SYM_NAME 2000 + + typedef BOOL (CALLBACK *PFIND_DEBUG_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData); + typedef BOOL (CALLBACK *PFINDFILEINPATHCALLBACK)(PSTR filename,PVOID context); + typedef BOOL (CALLBACK *PFIND_EXE_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData); + + typedef BOOL (WINAPI *PSYMBOLSERVERPROC)(LPCSTR,LPCSTR,PVOID,DWORD,DWORD,LPSTR); + typedef BOOL (WINAPI *PSYMBOLSERVEROPENPROC)(VOID); + typedef BOOL (WINAPI *PSYMBOLSERVERCLOSEPROC)(VOID); + typedef BOOL (WINAPI *PSYMBOLSERVERSETOPTIONSPROC)(UINT_PTR,ULONG64); + typedef BOOL (CALLBACK WINAPI *PSYMBOLSERVERCALLBACKPROC)(UINT_PTR action,ULONG64 data,ULONG64 context); + typedef UINT_PTR (WINAPI *PSYMBOLSERVERGETOPTIONSPROC)(); + typedef BOOL (WINAPI *PSYMBOLSERVERPINGPROC)(LPCSTR); + + HANDLE IMAGEAPI FindDebugInfoFile(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath); + HANDLE IMAGEAPI FindDebugInfoFileEx(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath,PFIND_DEBUG_FILE_CALLBACK Callback,PVOID CallerData); + BOOL IMAGEAPI SymFindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FoundFile,PFINDFILEINPATHCALLBACK callback,PVOID context); + HANDLE IMAGEAPI FindExecutableImage(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath); + HANDLE IMAGEAPI FindExecutableImageEx(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath,PFIND_EXE_FILE_CALLBACK Callback,PVOID CallerData); + PIMAGE_NT_HEADERS IMAGEAPI ImageNtHeader(PVOID Base); + PVOID IMAGEAPI ImageDirectoryEntryToDataEx(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size,PIMAGE_SECTION_HEADER *FoundHeader); + PVOID IMAGEAPI ImageDirectoryEntryToData(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size); + PIMAGE_SECTION_HEADER IMAGEAPI ImageRvaToSection(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva); + PVOID IMAGEAPI ImageRvaToVa(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva,PIMAGE_SECTION_HEADER *LastRvaSection); + +#define SSRVOPT_CALLBACK 0x0001 +#define SSRVOPT_DWORD 0x0002 +#define SSRVOPT_DWORDPTR 0x0004 +#define SSRVOPT_GUIDPTR 0x0008 +#define SSRVOPT_OLDGUIDPTR 0x0010 +#define SSRVOPT_UNATTENDED 0x0020 +#define SSRVOPT_NOCOPY 0x0040 +#define SSRVOPT_PARENTWIN 0x0080 +#define SSRVOPT_PARAMTYPE 0x0100 +#define SSRVOPT_SECURE 0x0200 +#define SSRVOPT_TRACE 0x0400 +#define SSRVOPT_SETCONTEXT 0x0800 +#define SSRVOPT_PROXY 0x1000 +#define SSRVOPT_DOWNSTREAM_STORE 0x2000 +#define SSRVOPT_RESET ((ULONG_PTR)-1) + +#define SSRVACTION_TRACE 1 +#define SSRVACTION_QUERYCANCEL 2 +#define SSRVACTION_EVENT 3 + +#ifndef _WIN64 + + typedef struct _IMAGE_DEBUG_INFORMATION { + LIST_ENTRY List; + DWORD ReservedSize; + PVOID ReservedMappedBase; + USHORT ReservedMachine; + USHORT ReservedCharacteristics; + DWORD ReservedCheckSum; + DWORD ImageBase; + DWORD SizeOfImage; + DWORD ReservedNumberOfSections; + PIMAGE_SECTION_HEADER ReservedSections; + DWORD ReservedExportedNamesSize; + PSTR ReservedExportedNames; + DWORD ReservedNumberOfFunctionTableEntries; + PIMAGE_FUNCTION_ENTRY ReservedFunctionTableEntries; + DWORD ReservedLowestFunctionStartingAddress; + DWORD ReservedHighestFunctionEndingAddress; + DWORD ReservedNumberOfFpoTableEntries; + PFPO_DATA ReservedFpoTableEntries; + DWORD SizeOfCoffSymbols; + PIMAGE_COFF_SYMBOLS_HEADER CoffSymbols; + DWORD ReservedSizeOfCodeViewSymbols; + PVOID ReservedCodeViewSymbols; + PSTR ImageFilePath; + PSTR ImageFileName; + PSTR ReservedDebugFilePath; + DWORD ReservedTimeDateStamp; + BOOL ReservedRomImage; + PIMAGE_DEBUG_DIRECTORY ReservedDebugDirectory; + DWORD ReservedNumberOfDebugDirectories; + DWORD ReservedOriginalFunctionTableBaseAddress; + DWORD Reserved[2 ]; + } IMAGE_DEBUG_INFORMATION,*PIMAGE_DEBUG_INFORMATION; + + PIMAGE_DEBUG_INFORMATION IMAGEAPI MapDebugInformation(HANDLE FileHandle,PSTR FileName,PSTR SymbolPath,DWORD ImageBase); + BOOL IMAGEAPI UnmapDebugInformation(PIMAGE_DEBUG_INFORMATION DebugInfo); +#endif + + typedef BOOL (CALLBACK *PENUMDIRTREE_CALLBACK)(LPCSTR FilePath,PVOID CallerData); + + BOOL IMAGEAPI SearchTreeForFile(PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer); + BOOL IMAGEAPI EnumDirTree(HANDLE hProcess,PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer,PENUMDIRTREE_CALLBACK Callback,PVOID CallbackData); + BOOL IMAGEAPI MakeSureDirectoryPathExists(PCSTR DirPath); + +#define UNDNAME_COMPLETE (0x0000) +#define UNDNAME_NO_LEADING_UNDERSCORES (0x0001) +#define UNDNAME_NO_MS_KEYWORDS (0x0002) +#define UNDNAME_NO_FUNCTION_RETURNS (0x0004) +#define UNDNAME_NO_ALLOCATION_MODEL (0x0008) +#define UNDNAME_NO_ALLOCATION_LANGUAGE (0x0010) +#define UNDNAME_NO_MS_THISTYPE (0x0020) +#define UNDNAME_NO_CV_THISTYPE (0x0040) +#define UNDNAME_NO_THISTYPE (0x0060) +#define UNDNAME_NO_ACCESS_SPECIFIERS (0x0080) +#define UNDNAME_NO_THROW_SIGNATURES (0x0100) +#define UNDNAME_NO_MEMBER_TYPE (0x0200) +#define UNDNAME_NO_RETURN_UDT_MODEL (0x0400) +#define UNDNAME_32_BIT_DECODE (0x0800) +#define UNDNAME_NAME_ONLY (0x1000) +#define UNDNAME_NO_ARGUMENTS (0x2000) +#define UNDNAME_NO_SPECIAL_SYMS (0x4000) + + DWORD IMAGEAPI WINAPI UnDecorateSymbolName(PCSTR DecoratedName,PSTR UnDecoratedName,DWORD UndecoratedLength,DWORD Flags); + +#define DBHHEADER_DEBUGDIRS 0x1 + + typedef struct _MODLOAD_DATA { + DWORD ssize; + DWORD ssig; + PVOID data; + DWORD size; + DWORD flags; + } MODLOAD_DATA,*PMODLOAD_DATA; + + typedef enum { + AddrMode1616,AddrMode1632,AddrModeReal,AddrModeFlat + } ADDRESS_MODE; + + typedef struct _tagADDRESS64 { + DWORD64 Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS64,*LPADDRESS64; + +#ifdef _IMAGEHLP64 +#define ADDRESS ADDRESS64 +#define LPADDRESS LPADDRESS64 +#else + typedef struct _tagADDRESS { + DWORD Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS,*LPADDRESS; + + static __inline void Address32To64(LPADDRESS a32,LPADDRESS64 a64) { + a64->Offset = (ULONG64)(LONG64)(LONG)a32->Offset; + a64->Segment = a32->Segment; + a64->Mode = a32->Mode; + } + + static __inline void Address64To32(LPADDRESS64 a64,LPADDRESS a32) { + a32->Offset = (ULONG)a64->Offset; + a32->Segment = a64->Segment; + a32->Mode = a64->Mode; + } +#endif + + typedef struct _KDHELP64 { + DWORD64 Thread; + DWORD ThCallbackStack; + DWORD ThCallbackBStore; + DWORD NextCallback; + DWORD FramePointer; + DWORD64 KiCallUserMode; + DWORD64 KeUserCallbackDispatcher; + DWORD64 SystemRangeStart; + DWORD64 Reserved[8]; + } KDHELP64,*PKDHELP64; + +#ifdef _IMAGEHLP64 +#define KDHELP KDHELP64 +#define PKDHELP PKDHELP64 +#else + typedef struct _KDHELP { + DWORD Thread; + DWORD ThCallbackStack; + DWORD NextCallback; + DWORD FramePointer; + DWORD KiCallUserMode; + DWORD KeUserCallbackDispatcher; + DWORD SystemRangeStart; + DWORD ThCallbackBStore; + DWORD Reserved[8]; + } KDHELP,*PKDHELP; + + static __inline void KdHelp32To64(PKDHELP p32,PKDHELP64 p64) { + p64->Thread = p32->Thread; + p64->ThCallbackStack = p32->ThCallbackStack; + p64->NextCallback = p32->NextCallback; + p64->FramePointer = p32->FramePointer; + p64->KiCallUserMode = p32->KiCallUserMode; + p64->KeUserCallbackDispatcher = p32->KeUserCallbackDispatcher; + p64->SystemRangeStart = p32->SystemRangeStart; + } +#endif + + typedef struct _tagSTACKFRAME64 { + ADDRESS64 AddrPC; + ADDRESS64 AddrReturn; + ADDRESS64 AddrFrame; + ADDRESS64 AddrStack; + ADDRESS64 AddrBStore; + PVOID FuncTableEntry; + DWORD64 Params[4]; + BOOL Far; + BOOL Virtual; + DWORD64 Reserved[3]; + KDHELP64 KdHelp; + } STACKFRAME64,*LPSTACKFRAME64; + +#ifdef _IMAGEHLP64 +#define STACKFRAME STACKFRAME64 +#define LPSTACKFRAME LPSTACKFRAME64 +#else + typedef struct _tagSTACKFRAME { + ADDRESS AddrPC; + ADDRESS AddrReturn; + ADDRESS AddrFrame; + ADDRESS AddrStack; + PVOID FuncTableEntry; + DWORD Params[4]; + BOOL Far; + BOOL Virtual; + DWORD Reserved[3]; + KDHELP KdHelp; + ADDRESS AddrBStore; + } STACKFRAME,*LPSTACKFRAME; +#endif + + typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,DWORD64 qwBaseAddress,PVOID lpBuffer,DWORD nSize,LPDWORD lpNumberOfBytesRead); + typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE64)(HANDLE hProcess,DWORD64 AddrBase); + typedef DWORD64 (WINAPI *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess,DWORD64 Address); + typedef DWORD64 (WINAPI *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess,HANDLE hThread,LPADDRESS64 lpaddr); + + BOOL IMAGEAPI StackWalk64(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME64 StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress); + +#ifdef _IMAGEHLP64 +#define PREAD_PROCESS_MEMORY_ROUTINE PREAD_PROCESS_MEMORY_ROUTINE64 +#define PFUNCTION_TABLE_ACCESS_ROUTINE PFUNCTION_TABLE_ACCESS_ROUTINE64 +#define PGET_MODULE_BASE_ROUTINE PGET_MODULE_BASE_ROUTINE64 +#define PTRANSLATE_ADDRESS_ROUTINE PTRANSLATE_ADDRESS_ROUTINE64 +#define StackWalk StackWalk64 +#else + typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE)(HANDLE hProcess,DWORD lpBaseAddress,PVOID lpBuffer,DWORD nSize,PDWORD lpNumberOfBytesRead); + typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE)(HANDLE hProcess,DWORD AddrBase); + typedef DWORD (WINAPI *PGET_MODULE_BASE_ROUTINE)(HANDLE hProcess,DWORD Address); + typedef DWORD (WINAPI *PTRANSLATE_ADDRESS_ROUTINE)(HANDLE hProcess,HANDLE hThread,LPADDRESS lpaddr); + + BOOL IMAGEAPI StackWalk(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE TranslateAddress); +#endif + +#define API_VERSION_NUMBER 9 + + typedef struct API_VERSION { + USHORT MajorVersion; + USHORT MinorVersion; + USHORT Revision; + USHORT Reserved; + } API_VERSION,*LPAPI_VERSION; + + LPAPI_VERSION IMAGEAPI ImagehlpApiVersion(VOID); + LPAPI_VERSION IMAGEAPI ImagehlpApiVersionEx(LPAPI_VERSION AppVersion); + DWORD IMAGEAPI GetTimestampForLoadedLibrary(HMODULE Module); + + typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK64)(PSTR ModuleName,DWORD64 BaseOfDll,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64)(PSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64W)(PWSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK64)(PSTR ModuleName,DWORD64 ModuleBase,ULONG ModuleSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK64)(HANDLE hProcess,ULONG ActionCode,ULONG64 CallbackData,ULONG64 UserContext); + typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK)(HANDLE hProcess,DWORD AddrBase,PVOID UserContext); + typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK64)(HANDLE hProcess,ULONG64 AddrBase,ULONG64 UserContext); + +#ifdef _IMAGEHLP64 +#define PSYM_ENUMMODULES_CALLBACK PSYM_ENUMMODULES_CALLBACK64 +#define PSYM_ENUMSYMBOLS_CALLBACK PSYM_ENUMSYMBOLS_CALLBACK64 +#define PSYM_ENUMSYMBOLS_CALLBACKW PSYM_ENUMSYMBOLS_CALLBACK64W +#define PENUMLOADED_MODULES_CALLBACK PENUMLOADED_MODULES_CALLBACK64 +#define PSYMBOL_REGISTERED_CALLBACK PSYMBOL_REGISTERED_CALLBACK64 +#define PSYMBOL_FUNCENTRY_CALLBACK PSYMBOL_FUNCENTRY_CALLBACK64 +#else + typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK)(PSTR ModuleName,ULONG BaseOfDll,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK)(PSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACKW)(PWSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK)(PSTR ModuleName,ULONG ModuleBase,ULONG ModuleSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK)(HANDLE hProcess,ULONG ActionCode,PVOID CallbackData,PVOID UserContext); +#endif + +#define SYMFLAG_VALUEPRESENT 0x00000001 +#define SYMFLAG_REGISTER 0x00000008 +#define SYMFLAG_REGREL 0x00000010 +#define SYMFLAG_FRAMEREL 0x00000020 +#define SYMFLAG_PARAMETER 0x00000040 +#define SYMFLAG_LOCAL 0x00000080 +#define SYMFLAG_CONSTANT 0x00000100 +#define SYMFLAG_EXPORT 0x00000200 +#define SYMFLAG_FORWARDER 0x00000400 +#define SYMFLAG_FUNCTION 0x00000800 +#define SYMFLAG_VIRTUAL 0x00001000 +#define SYMFLAG_THUNK 0x00002000 +#define SYMFLAG_TLSREL 0x00004000 + + typedef enum { + SymNone = 0,SymCoff,SymCv,SymPdb,SymExport,SymDeferred,SymSym,SymDia,SymVirtual,NumSymTypes + } SYM_TYPE; + + typedef struct _IMAGEHLP_SYMBOL64 { + DWORD SizeOfStruct; + DWORD64 Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL64,*PIMAGEHLP_SYMBOL64; + + typedef struct _IMAGEHLP_SYMBOL64_PACKAGE { + IMAGEHLP_SYMBOL64 sym; + CHAR name[MAX_SYM_NAME + 1]; + } IMAGEHLP_SYMBOL64_PACKAGE,*PIMAGEHLP_SYMBOL64_PACKAGE; + +#ifdef _IMAGEHLP64 + +#define IMAGEHLP_SYMBOL IMAGEHLP_SYMBOL64 +#define PIMAGEHLP_SYMBOL PIMAGEHLP_SYMBOL64 +#define IMAGEHLP_SYMBOL_PACKAGE IMAGEHLP_SYMBOL64_PACKAGE +#define PIMAGEHLP_SYMBOL_PACKAGE PIMAGEHLP_SYMBOL64_PACKAGE +#else + + typedef struct _IMAGEHLP_SYMBOL { + DWORD SizeOfStruct; + DWORD Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL,*PIMAGEHLP_SYMBOL; + + typedef struct _IMAGEHLP_SYMBOL_PACKAGE { + IMAGEHLP_SYMBOL sym; + CHAR name[MAX_SYM_NAME + 1]; + } IMAGEHLP_SYMBOL_PACKAGE,*PIMAGEHLP_SYMBOL_PACKAGE; +#endif + + typedef struct _IMAGEHLP_MODULE64 { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + CHAR ModuleName[32]; + CHAR ImageName[256]; + CHAR LoadedImageName[256]; + CHAR LoadedPdbName[256]; + DWORD CVSig; + CHAR CVData[MAX_PATH*3]; + DWORD PdbSig; + GUID PdbSig70; + DWORD PdbAge; + BOOL PdbUnmatched; + BOOL DbgUnmatched; + BOOL LineNumbers; + BOOL GlobalSymbols; + BOOL TypeInfo; + } IMAGEHLP_MODULE64,*PIMAGEHLP_MODULE64; + + typedef struct _IMAGEHLP_MODULE64W { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + WCHAR ModuleName[32]; + WCHAR ImageName[256]; + WCHAR LoadedImageName[256]; + WCHAR LoadedPdbName[256]; + DWORD CVSig; + WCHAR CVData[MAX_PATH*3]; + DWORD PdbSig; + GUID PdbSig70; + DWORD PdbAge; + BOOL PdbUnmatched; + BOOL DbgUnmatched; + BOOL LineNumbers; + BOOL GlobalSymbols; + BOOL TypeInfo; + } IMAGEHLP_MODULEW64,*PIMAGEHLP_MODULEW64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_MODULE IMAGEHLP_MODULE64 +#define PIMAGEHLP_MODULE PIMAGEHLP_MODULE64 +#define IMAGEHLP_MODULEW IMAGEHLP_MODULEW64 +#define PIMAGEHLP_MODULEW PIMAGEHLP_MODULEW64 +#else + typedef struct _IMAGEHLP_MODULE { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + CHAR ModuleName[32]; + CHAR ImageName[256]; + CHAR LoadedImageName[256]; + } IMAGEHLP_MODULE,*PIMAGEHLP_MODULE; + + typedef struct _IMAGEHLP_MODULEW { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + WCHAR ModuleName[32]; + WCHAR ImageName[256]; + WCHAR LoadedImageName[256]; + } IMAGEHLP_MODULEW,*PIMAGEHLP_MODULEW; +#endif + + typedef struct _IMAGEHLP_LINE64 { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD64 Address; + } IMAGEHLP_LINE64,*PIMAGEHLP_LINE64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_LINE IMAGEHLP_LINE64 +#define PIMAGEHLP_LINE PIMAGEHLP_LINE64 +#else + typedef struct _IMAGEHLP_LINE { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD Address; + } IMAGEHLP_LINE,*PIMAGEHLP_LINE; +#endif + + typedef struct _SOURCEFILE { + DWORD64 ModBase; + PCHAR FileName; + } SOURCEFILE,*PSOURCEFILE; + +#define CBA_DEFERRED_SYMBOL_LOAD_START 0x00000001 +#define CBA_DEFERRED_SYMBOL_LOAD_COMPLETE 0x00000002 +#define CBA_DEFERRED_SYMBOL_LOAD_FAILURE 0x00000003 +#define CBA_SYMBOLS_UNLOADED 0x00000004 +#define CBA_DUPLICATE_SYMBOL 0x00000005 +#define CBA_READ_MEMORY 0x00000006 +#define CBA_DEFERRED_SYMBOL_LOAD_CANCEL 0x00000007 +#define CBA_SET_OPTIONS 0x00000008 +#define CBA_EVENT 0x00000010 +#define CBA_DEFERRED_SYMBOL_LOAD_PARTIAL 0x00000020 +#define CBA_DEBUG_INFO 0x10000000 + + typedef struct _IMAGEHLP_CBA_READ_MEMORY { + DWORD64 addr; + PVOID buf; + DWORD bytes; + DWORD *bytesread; + } IMAGEHLP_CBA_READ_MEMORY,*PIMAGEHLP_CBA_READ_MEMORY; + + enum { + sevInfo = 0,sevProblem,sevAttn,sevFatal,sevMax + }; + + typedef struct _IMAGEHLP_CBA_EVENT { + DWORD severity; + DWORD code; + PCHAR desc; + PVOID object; + } IMAGEHLP_CBA_EVENT,*PIMAGEHLP_CBA_EVENT; + + typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD64 { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD CheckSum; + DWORD TimeDateStamp; + CHAR FileName[MAX_PATH]; + BOOLEAN Reparse; + HANDLE hFile; + DWORD Flags; + } IMAGEHLP_DEFERRED_SYMBOL_LOAD64,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD64; + +#define DSLFLAG_MISMATCHED_PDB 0x1 +#define DSLFLAG_MISMATCHED_DBG 0x2 + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_DEFERRED_SYMBOL_LOAD IMAGEHLP_DEFERRED_SYMBOL_LOAD64 +#define PIMAGEHLP_DEFERRED_SYMBOL_LOAD PIMAGEHLP_DEFERRED_SYMBOL_LOAD64 +#else + typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD CheckSum; + DWORD TimeDateStamp; + CHAR FileName[MAX_PATH]; + BOOLEAN Reparse; + HANDLE hFile; + } IMAGEHLP_DEFERRED_SYMBOL_LOAD,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD; +#endif + + typedef struct _IMAGEHLP_DUPLICATE_SYMBOL64 { + DWORD SizeOfStruct; + DWORD NumberOfDups; + PIMAGEHLP_SYMBOL64 Symbol; + DWORD SelectedSymbol; + } IMAGEHLP_DUPLICATE_SYMBOL64,*PIMAGEHLP_DUPLICATE_SYMBOL64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_DUPLICATE_SYMBOL IMAGEHLP_DUPLICATE_SYMBOL64 +#define PIMAGEHLP_DUPLICATE_SYMBOL PIMAGEHLP_DUPLICATE_SYMBOL64 +#else + typedef struct _IMAGEHLP_DUPLICATE_SYMBOL { + DWORD SizeOfStruct; + DWORD NumberOfDups; + PIMAGEHLP_SYMBOL Symbol; + DWORD SelectedSymbol; + } IMAGEHLP_DUPLICATE_SYMBOL,*PIMAGEHLP_DUPLICATE_SYMBOL; +#endif + + BOOL IMAGEAPI SymSetParentWindow(HWND hwnd); + PCHAR IMAGEAPI SymSetHomeDirectory(PCSTR dir); + PCHAR IMAGEAPI SymGetHomeDirectory(DWORD type,PSTR dir,size_t size); + + enum { + hdBase = 0,hdSym,hdSrc,hdMax + }; + +#define SYMOPT_CASE_INSENSITIVE 0x00000001 +#define SYMOPT_UNDNAME 0x00000002 +#define SYMOPT_DEFERRED_LOADS 0x00000004 +#define SYMOPT_NO_CPP 0x00000008 +#define SYMOPT_LOAD_LINES 0x00000010 +#define SYMOPT_OMAP_FIND_NEAREST 0x00000020 +#define SYMOPT_LOAD_ANYTHING 0x00000040 +#define SYMOPT_IGNORE_CVREC 0x00000080 +#define SYMOPT_NO_UNQUALIFIED_LOADS 0x00000100 +#define SYMOPT_FAIL_CRITICAL_ERRORS 0x00000200 +#define SYMOPT_EXACT_SYMBOLS 0x00000400 +#define SYMOPT_ALLOW_ABSOLUTE_SYMBOLS 0x00000800 +#define SYMOPT_IGNORE_NT_SYMPATH 0x00001000 +#define SYMOPT_INCLUDE_32BIT_MODULES 0x00002000 +#define SYMOPT_PUBLICS_ONLY 0x00004000 +#define SYMOPT_NO_PUBLICS 0x00008000 +#define SYMOPT_AUTO_PUBLICS 0x00010000 +#define SYMOPT_NO_IMAGE_SEARCH 0x00020000 +#define SYMOPT_SECURE 0x00040000 +#define SYMOPT_NO_PROMPTS 0x00080000 + +#define SYMOPT_DEBUG 0x80000000 + + DWORD IMAGEAPI SymSetOptions(DWORD SymOptions); + DWORD IMAGEAPI SymGetOptions(VOID); + BOOL IMAGEAPI SymCleanup(HANDLE hProcess); + BOOL IMAGEAPI SymMatchString(LPSTR string,LPSTR expression,BOOL fCase); + + typedef BOOL (CALLBACK *PSYM_ENUMSOURCFILES_CALLBACK)(PSOURCEFILE pSourceFile,PVOID UserContext); + + BOOL IMAGEAPI SymEnumSourceFiles(HANDLE hProcess,ULONG64 ModBase,LPSTR Mask,PSYM_ENUMSOURCFILES_CALLBACK cbSrcFiles,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateModules64(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK64 EnumModulesCallback,PVOID UserContext); + +#ifdef _IMAGEHLP64 +#define SymEnumerateModules SymEnumerateModules64 +#else + BOOL IMAGEAPI SymEnumerateModules(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK EnumModulesCallback,PVOID UserContext); +#endif + + BOOL IMAGEAPI SymEnumerateSymbols64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64 EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateSymbolsW64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64W EnumSymbolsCallback,PVOID UserContext); + +#ifdef _IMAGEHLP64 +#define SymEnumerateSymbols SymEnumerateSymbols64 +#define SymEnumerateSymbolsW SymEnumerateSymbolsW64 +#else + BOOL IMAGEAPI SymEnumerateSymbols(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateSymbolsW(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACKW EnumSymbolsCallback,PVOID UserContext); +#endif + + BOOL IMAGEAPI EnumerateLoadedModules64(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK64 EnumLoadedModulesCallback,PVOID UserContext); +#ifdef _IMAGEHLP64 +#define EnumerateLoadedModules EnumerateLoadedModules64 +#else + BOOL IMAGEAPI EnumerateLoadedModules(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK EnumLoadedModulesCallback,PVOID UserContext); +#endif + + PVOID IMAGEAPI SymFunctionTableAccess64(HANDLE hProcess,DWORD64 AddrBase); + +#ifdef _IMAGEHLP64 +#define SymFunctionTableAccess SymFunctionTableAccess64 +#else + PVOID IMAGEAPI SymFunctionTableAccess(HANDLE hProcess,DWORD AddrBase); +#endif + + BOOL IMAGEAPI SymGetModuleInfo64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULE64 ModuleInfo); + BOOL IMAGEAPI SymGetModuleInfoW64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULEW64 ModuleInfo); + +#ifdef _IMAGEHLP64 +#define SymGetModuleInfo SymGetModuleInfo64 +#define SymGetModuleInfoW SymGetModuleInfoW64 +#else + BOOL IMAGEAPI SymGetModuleInfo(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULE ModuleInfo); + BOOL IMAGEAPI SymGetModuleInfoW(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULEW ModuleInfo); +#endif + + DWORD64 IMAGEAPI SymGetModuleBase64(HANDLE hProcess,DWORD64 qwAddr); + +#ifdef _IMAGEHLP64 +#define SymGetModuleBase SymGetModuleBase64 +#else + DWORD IMAGEAPI SymGetModuleBase(HANDLE hProcess,DWORD dwAddr); +#endif + + BOOL IMAGEAPI SymGetSymNext64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymNext SymGetSymNext64 +#else + BOOL IMAGEAPI SymGetSymNext(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol); +#endif + + BOOL IMAGEAPI SymGetSymPrev64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymPrev SymGetSymPrev64 +#else + BOOL IMAGEAPI SymGetSymPrev(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol); +#endif + + typedef struct _SRCCODEINFO { + DWORD SizeOfStruct; + PVOID Key; + DWORD64 ModBase; + CHAR Obj[MAX_PATH + 1]; + CHAR FileName[MAX_PATH + 1]; + DWORD LineNumber; + DWORD64 Address; + } SRCCODEINFO,*PSRCCODEINFO; + + typedef BOOL (CALLBACK *PSYM_ENUMLINES_CALLBACK)(PSRCCODEINFO LineInfo,PVOID UserContext); + + BOOL IMAGEAPI SymEnumLines(HANDLE hProcess,ULONG64 Base,PCSTR Obj,PCSTR File,PSYM_ENUMLINES_CALLBACK EnumLinesCallback,PVOID UserContext); + BOOL IMAGEAPI SymGetLineFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE64 Line64); + +#ifdef _IMAGEHLP64 +#define SymGetLineFromAddr SymGetLineFromAddr64 +#else + BOOL IMAGEAPI SymGetLineFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLineFromName64(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLineFromName SymGetLineFromName64 +#else + BOOL IMAGEAPI SymGetLineFromName(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLineNext64(HANDLE hProcess,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLineNext SymGetLineNext64 +#else + BOOL IMAGEAPI SymGetLineNext(HANDLE hProcess,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLinePrev64(HANDLE hProcess,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLinePrev SymGetLinePrev64 +#else + BOOL IMAGEAPI SymGetLinePrev(HANDLE hProcess,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymMatchFileName(PSTR FileName,PSTR Match,PSTR *FileNameStop,PSTR *MatchStop); + BOOL IMAGEAPI SymInitialize(HANDLE hProcess,PSTR UserSearchPath,BOOL fInvadeProcess); + BOOL IMAGEAPI SymGetSearchPath(HANDLE hProcess,PSTR SearchPath,DWORD SearchPathLength); + BOOL IMAGEAPI SymSetSearchPath(HANDLE hProcess,PSTR SearchPath); + DWORD64 IMAGEAPI SymLoadModule64(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD SizeOfDll); + +#define SLMFLAG_VIRTUAL 0x1 + + DWORD64 IMAGEAPI SymLoadModuleEx(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD DllSize,PMODLOAD_DATA Data,DWORD Flags); + +#ifdef _IMAGEHLP64 +#define SymLoadModule SymLoadModule64 +#else + DWORD IMAGEAPI SymLoadModule(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD BaseOfDll,DWORD SizeOfDll); +#endif + + BOOL IMAGEAPI SymUnloadModule64(HANDLE hProcess,DWORD64 BaseOfDll); + +#ifdef _IMAGEHLP64 +#define SymUnloadModule SymUnloadModule64 +#else + BOOL IMAGEAPI SymUnloadModule(HANDLE hProcess,DWORD BaseOfDll); +#endif + + BOOL IMAGEAPI SymUnDName64(PIMAGEHLP_SYMBOL64 sym,PSTR UnDecName,DWORD UnDecNameLength); + +#ifdef _IMAGEHLP64 +#define SymUnDName SymUnDName64 +#else + BOOL IMAGEAPI SymUnDName(PIMAGEHLP_SYMBOL sym,PSTR UnDecName,DWORD UnDecNameLength); +#endif + + BOOL IMAGEAPI SymRegisterCallback64(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK64 CallbackFunction,ULONG64 UserContext); + + BOOL IMAGEAPI SymRegisterFunctionEntryCallback64(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK64 CallbackFunction,ULONG64 UserContext); + +#ifdef _IMAGEHLP64 +#define SymRegisterCallback SymRegisterCallback64 +#define SymRegisterFunctionEntryCallback SymRegisterFunctionEntryCallback64 +#else + BOOL IMAGEAPI SymRegisterCallback(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK CallbackFunction,PVOID UserContext); + BOOL IMAGEAPI SymRegisterFunctionEntryCallback(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK CallbackFunction,PVOID UserContext); +#endif + + typedef struct _IMAGEHLP_SYMBOL_SRC { + DWORD sizeofstruct; + DWORD type; + char file[MAX_PATH]; + } IMAGEHLP_SYMBOL_SRC,*PIMAGEHLP_SYMBOL_SRC; + + typedef struct _MODULE_TYPE_INFO { + USHORT dataLength; + USHORT leaf; + BYTE data[1]; + } MODULE_TYPE_INFO,*PMODULE_TYPE_INFO; + + typedef struct _SYMBOL_INFO { + ULONG SizeOfStruct; + ULONG TypeIndex; + ULONG64 Reserved[2]; + ULONG info; + ULONG Size; + ULONG64 ModBase; + ULONG Flags; + ULONG64 Value; + ULONG64 Address; + ULONG Register; + ULONG Scope; + ULONG Tag; + ULONG NameLen; + ULONG MaxNameLen; + CHAR Name[1]; + } SYMBOL_INFO,*PSYMBOL_INFO; + + typedef struct _SYMBOL_INFO_PACKAGE { + SYMBOL_INFO si; + CHAR name[MAX_SYM_NAME + 1]; + } SYMBOL_INFO_PACKAGE,*PSYMBOL_INFO_PACKAGE; + + typedef struct _IMAGEHLP_STACK_FRAME + { + ULONG64 InstructionOffset; + ULONG64 ReturnOffset; + ULONG64 FrameOffset; + ULONG64 StackOffset; + ULONG64 BackingStoreOffset; + ULONG64 FuncTableEntry; + ULONG64 Params[4]; + ULONG64 Reserved[5]; + BOOL Virtual; + ULONG Reserved2; + } IMAGEHLP_STACK_FRAME,*PIMAGEHLP_STACK_FRAME; + + typedef VOID IMAGEHLP_CONTEXT,*PIMAGEHLP_CONTEXT; + + BOOL IMAGEAPI SymSetContext(HANDLE hProcess,PIMAGEHLP_STACK_FRAME StackFrame,PIMAGEHLP_CONTEXT Context); + BOOL IMAGEAPI SymFromAddr(HANDLE hProcess,DWORD64 Address,PDWORD64 Displacement,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymFromToken(HANDLE hProcess,DWORD64 Base,DWORD Token,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymFromName(HANDLE hProcess,LPSTR Name,PSYMBOL_INFO Symbol); + + typedef BOOL (CALLBACK *PSYM_ENUMERATESYMBOLS_CALLBACK)(PSYMBOL_INFO pSymInfo,ULONG SymbolSize,PVOID UserContext); + + BOOL IMAGEAPI SymEnumSymbols(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Mask,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumSymbolsForAddr(HANDLE hProcess,DWORD64 Address,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + +#define SYMENUMFLAG_FULLSRCH 1 +#define SYMENUMFLAG_SPEEDSRCH 2 + + typedef enum _IMAGEHLP_SYMBOL_TYPE_INFO { + TI_GET_SYMTAG,TI_GET_SYMNAME,TI_GET_LENGTH,TI_GET_TYPE,TI_GET_TYPEID,TI_GET_BASETYPE,TI_GET_ARRAYINDEXTYPEID,TI_FINDCHILDREN, + TI_GET_DATAKIND,TI_GET_ADDRESSOFFSET,TI_GET_OFFSET,TI_GET_VALUE,TI_GET_COUNT,TI_GET_CHILDRENCOUNT,TI_GET_BITPOSITION,TI_GET_VIRTUALBASECLASS, + TI_GET_VIRTUALTABLESHAPEID,TI_GET_VIRTUALBASEPOINTEROFFSET,TI_GET_CLASSPARENTID,TI_GET_NESTED,TI_GET_SYMINDEX,TI_GET_LEXICALPARENT, + TI_GET_ADDRESS,TI_GET_THISADJUST,TI_GET_UDTKIND,TI_IS_EQUIV_TO,TI_GET_CALLING_CONVENTION + } IMAGEHLP_SYMBOL_TYPE_INFO; + + typedef struct _TI_FINDCHILDREN_PARAMS { + ULONG Count; + ULONG Start; + ULONG ChildId[1]; + } TI_FINDCHILDREN_PARAMS; + + BOOL IMAGEAPI SymGetTypeInfo(HANDLE hProcess,DWORD64 ModBase,ULONG TypeId,IMAGEHLP_SYMBOL_TYPE_INFO GetType,PVOID pInfo); + BOOL IMAGEAPI SymEnumTypes(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymGetTypeFromName(HANDLE hProcess,ULONG64 BaseOfDll,LPSTR Name,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymAddSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Size,DWORD Flags); + BOOL IMAGEAPI SymDeleteSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Flags); + + typedef BOOL (WINAPI *PDBGHELP_CREATE_USER_DUMP_CALLBACK)(DWORD DataType,PVOID *Data,LPDWORD DataLength,PVOID UserData); + + BOOL WINAPI DbgHelpCreateUserDump(LPSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData); + BOOL WINAPI DbgHelpCreateUserDumpW(LPWSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData); + BOOL IMAGEAPI SymGetSymFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD64 pdwDisplacement,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymFromAddr SymGetSymFromAddr64 +#else + BOOL IMAGEAPI SymGetSymFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_SYMBOL Symbol); +#endif + + BOOL IMAGEAPI SymGetSymFromName64(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymFromName SymGetSymFromName64 +#else + BOOL IMAGEAPI SymGetSymFromName(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL Symbol); +#endif + + DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FilePath); + DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInSearchPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,DWORD one,DWORD two,DWORD three,LPSTR FilePath); + DBHLP_DEPRECIATED BOOL IMAGEAPI SymEnumSym(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + +#define SYMF_OMAP_GENERATED 0x00000001 +#define SYMF_OMAP_MODIFIED 0x00000002 +#define SYMF_REGISTER 0x00000008 +#define SYMF_REGREL 0x00000010 +#define SYMF_FRAMEREL 0x00000020 +#define SYMF_PARAMETER 0x00000040 +#define SYMF_LOCAL 0x00000080 +#define SYMF_CONSTANT 0x00000100 +#define SYMF_EXPORT 0x00000200 +#define SYMF_FORWARDER 0x00000400 +#define SYMF_FUNCTION 0x00000800 +#define SYMF_VIRTUAL 0x00001000 +#define SYMF_THUNK 0x00002000 +#define SYMF_TLSREL 0x00004000 + +#define IMAGEHLP_SYMBOL_INFO_VALUEPRESENT 1 +#define IMAGEHLP_SYMBOL_INFO_REGISTER SYMF_REGISTER +#define IMAGEHLP_SYMBOL_INFO_REGRELATIVE SYMF_REGREL +#define IMAGEHLP_SYMBOL_INFO_FRAMERELATIVE SYMF_FRAMEREL +#define IMAGEHLP_SYMBOL_INFO_PARAMETER SYMF_PARAMETER +#define IMAGEHLP_SYMBOL_INFO_LOCAL SYMF_LOCAL +#define IMAGEHLP_SYMBOL_INFO_CONSTANT SYMF_CONSTANT +#define IMAGEHLP_SYMBOL_FUNCTION SYMF_FUNCTION +#define IMAGEHLP_SYMBOL_VIRTUAL SYMF_VIRTUAL +#define IMAGEHLP_SYMBOL_THUNK SYMF_THUNK +#define IMAGEHLP_SYMBOL_INFO_TLSRELATIVE SYMF_TLSREL + +#include <pshpack4.h> + +#define MINIDUMP_SIGNATURE ('PMDM') +#define MINIDUMP_VERSION (42899) + typedef DWORD RVA; + typedef ULONG64 RVA64; + + typedef struct _MINIDUMP_LOCATION_DESCRIPTOR { + ULONG32 DataSize; + RVA Rva; + } MINIDUMP_LOCATION_DESCRIPTOR; + + typedef struct _MINIDUMP_LOCATION_DESCRIPTOR64 { + ULONG64 DataSize; + RVA64 Rva; + } MINIDUMP_LOCATION_DESCRIPTOR64; + + typedef struct _MINIDUMP_MEMORY_DESCRIPTOR { + ULONG64 StartOfMemoryRange; + MINIDUMP_LOCATION_DESCRIPTOR Memory; + } MINIDUMP_MEMORY_DESCRIPTOR,*PMINIDUMP_MEMORY_DESCRIPTOR; + + typedef struct _MINIDUMP_MEMORY_DESCRIPTOR64 { + ULONG64 StartOfMemoryRange; + ULONG64 DataSize; + } MINIDUMP_MEMORY_DESCRIPTOR64,*PMINIDUMP_MEMORY_DESCRIPTOR64; + + typedef struct _MINIDUMP_HEADER { + ULONG32 Signature; + ULONG32 Version; + ULONG32 NumberOfStreams; + RVA StreamDirectoryRva; + ULONG32 CheckSum; + union { + ULONG32 Reserved; + ULONG32 TimeDateStamp; + } DUMMYUNIONNAME; + ULONG64 Flags; + } MINIDUMP_HEADER,*PMINIDUMP_HEADER; + + typedef struct _MINIDUMP_DIRECTORY { + ULONG32 StreamType; + MINIDUMP_LOCATION_DESCRIPTOR Location; + } MINIDUMP_DIRECTORY,*PMINIDUMP_DIRECTORY; + + typedef struct _MINIDUMP_STRING { + ULONG32 Length; + WCHAR Buffer [0]; + } MINIDUMP_STRING,*PMINIDUMP_STRING; + + typedef enum _MINIDUMP_STREAM_TYPE { + UnusedStream = 0,ReservedStream0 = 1,ReservedStream1 = 2,ThreadListStream = 3,ModuleListStream = 4,MemoryListStream = 5, + ExceptionStream = 6,SystemInfoStream = 7,ThreadExListStream = 8,Memory64ListStream = 9,CommentStreamA = 10,CommentStreamW = 11, + HandleDataStream = 12,FunctionTableStream = 13,UnloadedModuleListStream = 14,MiscInfoStream = 15,LastReservedStream = 0xffff + } MINIDUMP_STREAM_TYPE; + + typedef union _CPU_INFORMATION { + struct { + ULONG32 VendorId [3 ]; + ULONG32 VersionInformation; + ULONG32 FeatureInformation; + ULONG32 AMDExtendedCpuFeatures; + } X86CpuInfo; + struct { + ULONG64 ProcessorFeatures [2 ]; + } OtherCpuInfo; + } CPU_INFORMATION,*PCPU_INFORMATION; + + typedef struct _MINIDUMP_SYSTEM_INFO { + USHORT ProcessorArchitecture; + USHORT ProcessorLevel; + USHORT ProcessorRevision; + union { + USHORT Reserved0; + struct { + UCHAR NumberOfProcessors; + UCHAR ProductType; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + ULONG32 MajorVersion; + ULONG32 MinorVersion; + ULONG32 BuildNumber; + ULONG32 PlatformId; + RVA CSDVersionRva; + union { + ULONG32 Reserved1; + struct { + USHORT SuiteMask; + USHORT Reserved2; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME1; + CPU_INFORMATION Cpu; + } MINIDUMP_SYSTEM_INFO,*PMINIDUMP_SYSTEM_INFO; + + C_ASSERT (sizeof (((PPROCESS_INFORMATION)0)->dwThreadId)==4); + + typedef struct _MINIDUMP_THREAD { + ULONG32 ThreadId; + ULONG32 SuspendCount; + ULONG32 PriorityClass; + ULONG32 Priority; + ULONG64 Teb; + MINIDUMP_MEMORY_DESCRIPTOR Stack; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + } MINIDUMP_THREAD,*PMINIDUMP_THREAD; + + typedef struct _MINIDUMP_THREAD_LIST { + ULONG32 NumberOfThreads; + MINIDUMP_THREAD Threads [0]; + } MINIDUMP_THREAD_LIST,*PMINIDUMP_THREAD_LIST; + + typedef struct _MINIDUMP_THREAD_EX { + ULONG32 ThreadId; + ULONG32 SuspendCount; + ULONG32 PriorityClass; + ULONG32 Priority; + ULONG64 Teb; + MINIDUMP_MEMORY_DESCRIPTOR Stack; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + MINIDUMP_MEMORY_DESCRIPTOR BackingStore; + } MINIDUMP_THREAD_EX,*PMINIDUMP_THREAD_EX; + + typedef struct _MINIDUMP_THREAD_EX_LIST { + ULONG32 NumberOfThreads; + MINIDUMP_THREAD_EX Threads [0]; + } MINIDUMP_THREAD_EX_LIST,*PMINIDUMP_THREAD_EX_LIST; + + typedef struct _MINIDUMP_EXCEPTION { + ULONG32 ExceptionCode; + ULONG32 ExceptionFlags; + ULONG64 ExceptionRecord; + ULONG64 ExceptionAddress; + ULONG32 NumberParameters; + ULONG32 __unusedAlignment; + ULONG64 ExceptionInformation [EXCEPTION_MAXIMUM_PARAMETERS ]; + } MINIDUMP_EXCEPTION,*PMINIDUMP_EXCEPTION; + + typedef struct MINIDUMP_EXCEPTION_STREAM { + ULONG32 ThreadId; + ULONG32 __alignment; + MINIDUMP_EXCEPTION ExceptionRecord; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + } MINIDUMP_EXCEPTION_STREAM,*PMINIDUMP_EXCEPTION_STREAM; + + typedef struct _MINIDUMP_MODULE { + ULONG64 BaseOfImage; + ULONG32 SizeOfImage; + ULONG32 CheckSum; + ULONG32 TimeDateStamp; + RVA ModuleNameRva; + VS_FIXEDFILEINFO VersionInfo; + MINIDUMP_LOCATION_DESCRIPTOR CvRecord; + MINIDUMP_LOCATION_DESCRIPTOR MiscRecord; + ULONG64 Reserved0; + ULONG64 Reserved1; + } MINIDUMP_MODULE,*PMINIDUMP_MODULE; + + typedef struct _MINIDUMP_MODULE_LIST { + ULONG32 NumberOfModules; + MINIDUMP_MODULE Modules [0 ]; + } MINIDUMP_MODULE_LIST,*PMINIDUMP_MODULE_LIST; + + typedef struct _MINIDUMP_MEMORY_LIST { + ULONG32 NumberOfMemoryRanges; + MINIDUMP_MEMORY_DESCRIPTOR MemoryRanges [0]; + } MINIDUMP_MEMORY_LIST,*PMINIDUMP_MEMORY_LIST; + + typedef struct _MINIDUMP_MEMORY64_LIST { + ULONG64 NumberOfMemoryRanges; + RVA64 BaseRva; + MINIDUMP_MEMORY_DESCRIPTOR64 MemoryRanges [0]; + } MINIDUMP_MEMORY64_LIST,*PMINIDUMP_MEMORY64_LIST; + + typedef struct _MINIDUMP_EXCEPTION_INFORMATION { + DWORD ThreadId; + PEXCEPTION_POINTERS ExceptionPointers; + BOOL ClientPointers; + } MINIDUMP_EXCEPTION_INFORMATION,*PMINIDUMP_EXCEPTION_INFORMATION; + + typedef struct _MINIDUMP_EXCEPTION_INFORMATION64 { + DWORD ThreadId; + ULONG64 ExceptionRecord; + ULONG64 ContextRecord; + BOOL ClientPointers; + } MINIDUMP_EXCEPTION_INFORMATION64,*PMINIDUMP_EXCEPTION_INFORMATION64; + + typedef struct _MINIDUMP_HANDLE_DESCRIPTOR { + ULONG64 Handle; + RVA TypeNameRva; + RVA ObjectNameRva; + ULONG32 Attributes; + ULONG32 GrantedAccess; + ULONG32 HandleCount; + ULONG32 PointerCount; + } MINIDUMP_HANDLE_DESCRIPTOR,*PMINIDUMP_HANDLE_DESCRIPTOR; + + typedef struct _MINIDUMP_HANDLE_DATA_STREAM { + ULONG32 SizeOfHeader; + ULONG32 SizeOfDescriptor; + ULONG32 NumberOfDescriptors; + ULONG32 Reserved; + } MINIDUMP_HANDLE_DATA_STREAM,*PMINIDUMP_HANDLE_DATA_STREAM; + + typedef struct _MINIDUMP_FUNCTION_TABLE_DESCRIPTOR { + ULONG64 MinimumAddress; + ULONG64 MaximumAddress; + ULONG64 BaseAddress; + ULONG32 EntryCount; + ULONG32 SizeOfAlignPad; + } MINIDUMP_FUNCTION_TABLE_DESCRIPTOR,*PMINIDUMP_FUNCTION_TABLE_DESCRIPTOR; + + typedef struct _MINIDUMP_FUNCTION_TABLE_STREAM { + ULONG32 SizeOfHeader; + ULONG32 SizeOfDescriptor; + ULONG32 SizeOfNativeDescriptor; + ULONG32 SizeOfFunctionEntry; + ULONG32 NumberOfDescriptors; + ULONG32 SizeOfAlignPad; + } MINIDUMP_FUNCTION_TABLE_STREAM,*PMINIDUMP_FUNCTION_TABLE_STREAM; + + typedef struct _MINIDUMP_UNLOADED_MODULE { + ULONG64 BaseOfImage; + ULONG32 SizeOfImage; + ULONG32 CheckSum; + ULONG32 TimeDateStamp; + RVA ModuleNameRva; + } MINIDUMP_UNLOADED_MODULE,*PMINIDUMP_UNLOADED_MODULE; + + typedef struct _MINIDUMP_UNLOADED_MODULE_LIST { + ULONG32 SizeOfHeader; + ULONG32 SizeOfEntry; + ULONG32 NumberOfEntries; + } MINIDUMP_UNLOADED_MODULE_LIST,*PMINIDUMP_UNLOADED_MODULE_LIST; + +#define MINIDUMP_MISC1_PROCESS_ID 0x00000001 +#define MINIDUMP_MISC1_PROCESS_TIMES 0x00000002 + + typedef struct _MINIDUMP_MISC_INFO { + ULONG32 SizeOfInfo; + ULONG32 Flags1; + ULONG32 ProcessId; + ULONG32 ProcessCreateTime; + ULONG32 ProcessUserTime; + ULONG32 ProcessKernelTime; + } MINIDUMP_MISC_INFO,*PMINIDUMP_MISC_INFO; + + typedef struct _MINIDUMP_USER_RECORD { + ULONG32 Type; + MINIDUMP_LOCATION_DESCRIPTOR Memory; + } MINIDUMP_USER_RECORD,*PMINIDUMP_USER_RECORD; + + typedef struct _MINIDUMP_USER_STREAM { + ULONG32 Type; + ULONG BufferSize; + PVOID Buffer; + } MINIDUMP_USER_STREAM,*PMINIDUMP_USER_STREAM; + + typedef struct _MINIDUMP_USER_STREAM_INFORMATION { + ULONG UserStreamCount; + PMINIDUMP_USER_STREAM UserStreamArray; + } MINIDUMP_USER_STREAM_INFORMATION,*PMINIDUMP_USER_STREAM_INFORMATION; + + typedef enum _MINIDUMP_CALLBACK_TYPE { + ModuleCallback,ThreadCallback,ThreadExCallback,IncludeThreadCallback,IncludeModuleCallback,MemoryCallback + } MINIDUMP_CALLBACK_TYPE; + + typedef struct _MINIDUMP_THREAD_CALLBACK { + ULONG ThreadId; + HANDLE ThreadHandle; + CONTEXT Context; + ULONG SizeOfContext; + ULONG64 StackBase; + ULONG64 StackEnd; + } MINIDUMP_THREAD_CALLBACK,*PMINIDUMP_THREAD_CALLBACK; + + typedef struct _MINIDUMP_THREAD_EX_CALLBACK { + ULONG ThreadId; + HANDLE ThreadHandle; + CONTEXT Context; + ULONG SizeOfContext; + ULONG64 StackBase; + ULONG64 StackEnd; + ULONG64 BackingStoreBase; + ULONG64 BackingStoreEnd; + } MINIDUMP_THREAD_EX_CALLBACK,*PMINIDUMP_THREAD_EX_CALLBACK; + + typedef struct _MINIDUMP_INCLUDE_THREAD_CALLBACK { + ULONG ThreadId; + } MINIDUMP_INCLUDE_THREAD_CALLBACK,*PMINIDUMP_INCLUDE_THREAD_CALLBACK; + + typedef enum _THREAD_WRITE_FLAGS { + ThreadWriteThread = 0x0001,ThreadWriteStack = 0x0002,ThreadWriteContext = 0x0004,ThreadWriteBackingStore = 0x0008, + ThreadWriteInstructionWindow = 0x0010,ThreadWriteThreadData = 0x0020 + } THREAD_WRITE_FLAGS; + + typedef struct _MINIDUMP_MODULE_CALLBACK { + PWCHAR FullPath; + ULONG64 BaseOfImage; + ULONG SizeOfImage; + ULONG CheckSum; + ULONG TimeDateStamp; + VS_FIXEDFILEINFO VersionInfo; + PVOID CvRecord; + ULONG SizeOfCvRecord; + PVOID MiscRecord; + ULONG SizeOfMiscRecord; + } MINIDUMP_MODULE_CALLBACK,*PMINIDUMP_MODULE_CALLBACK; + + typedef struct _MINIDUMP_INCLUDE_MODULE_CALLBACK { + ULONG64 BaseOfImage; + } MINIDUMP_INCLUDE_MODULE_CALLBACK,*PMINIDUMP_INCLUDE_MODULE_CALLBACK; + + typedef enum _MODULE_WRITE_FLAGS { + ModuleWriteModule = 0x0001,ModuleWriteDataSeg = 0x0002,ModuleWriteMiscRecord = 0x0004,ModuleWriteCvRecord = 0x0008, + ModuleReferencedByMemory = 0x0010 + } MODULE_WRITE_FLAGS; + + typedef struct _MINIDUMP_CALLBACK_INPUT { + ULONG ProcessId; + HANDLE ProcessHandle; + ULONG CallbackType; + union { + MINIDUMP_THREAD_CALLBACK Thread; + MINIDUMP_THREAD_EX_CALLBACK ThreadEx; + MINIDUMP_MODULE_CALLBACK Module; + MINIDUMP_INCLUDE_THREAD_CALLBACK IncludeThread; + MINIDUMP_INCLUDE_MODULE_CALLBACK IncludeModule; + } DUMMYUNIONNAME; + } MINIDUMP_CALLBACK_INPUT,*PMINIDUMP_CALLBACK_INPUT; + + typedef struct _MINIDUMP_CALLBACK_OUTPUT { + union { + ULONG ModuleWriteFlags; + ULONG ThreadWriteFlags; + struct { + ULONG64 MemoryBase; + ULONG MemorySize; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + } MINIDUMP_CALLBACK_OUTPUT,*PMINIDUMP_CALLBACK_OUTPUT; + + typedef enum _MINIDUMP_TYPE { + MiniDumpNormal = 0x0000,MiniDumpWithDataSegs = 0x0001,MiniDumpWithFullMemory = 0x0002,MiniDumpWithHandleData = 0x0004, + MiniDumpFilterMemory = 0x0008,MiniDumpScanMemory = 0x0010,MiniDumpWithUnloadedModules = 0x0020,MiniDumpWithIndirectlyReferencedMemory = 0x0040, + MiniDumpFilterModulePaths = 0x0080,MiniDumpWithProcessThreadData = 0x0100,MiniDumpWithPrivateReadWriteMemory = 0x0200, + MiniDumpWithoutOptionalData = 0x0400 + } MINIDUMP_TYPE; + + typedef BOOL (WINAPI *MINIDUMP_CALLBACK_ROUTINE)(PVOID CallbackParam,CONST PMINIDUMP_CALLBACK_INPUT CallbackInput,PMINIDUMP_CALLBACK_OUTPUT CallbackOutput); + + typedef struct _MINIDUMP_CALLBACK_INFORMATION { + MINIDUMP_CALLBACK_ROUTINE CallbackRoutine; + PVOID CallbackParam; + } MINIDUMP_CALLBACK_INFORMATION,*PMINIDUMP_CALLBACK_INFORMATION; + +#define RVA_TO_ADDR(Mapping,Rva) ((PVOID)(((ULONG_PTR) (Mapping)) + (Rva))) + + BOOL WINAPI MiniDumpWriteDump(HANDLE hProcess,DWORD ProcessId,HANDLE hFile,MINIDUMP_TYPE DumpType,CONST PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,CONST PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,CONST PMINIDUMP_CALLBACK_INFORMATION CallbackParam); + BOOL WINAPI MiniDumpReadDumpStream(PVOID BaseOfDump,ULONG StreamNumber,PMINIDUMP_DIRECTORY *Dir,PVOID *StreamPointer,ULONG *StreamSize); + +#include <poppack.h> + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index c11f7d383db..76bd7ace526 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -199,6 +199,7 @@ get_next_slot( struct blit_state *ctx ) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } @@ -480,6 +481,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); cso_save_vertex_elements(ctx->cso); + cso_save_vertex_buffers(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); @@ -554,7 +556,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, s1, t1, z); - util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset, + util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ @@ -571,6 +573,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); cso_restore_vertex_elements(ctx->cso); + cso_restore_vertex_buffers(ctx->cso); pipe_sampler_view_reference(&sampler_view, NULL); } @@ -672,6 +675,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); cso_save_vertex_elements(ctx->cso); + cso_save_vertex_buffers(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); @@ -722,7 +726,7 @@ util_blit_pixels_tex(struct blit_state *ctx, s0, t0, s1, t1, z); - util_draw_vertex_buffer(ctx->pipe, + util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ @@ -740,4 +744,5 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); cso_restore_vertex_elements(ctx->cso); + cso_restore_vertex_buffers(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 545021d2642..fd1c2b72d04 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -86,7 +86,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -156,10 +155,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -225,9 +220,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->vertices[i][0][3] = 1; /*v.w*/ /* create the vertex buffer */ - ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(ctx->vertices)); + ctx->vbuf = pipe_user_buffer_create(ctx->base.pipe->screen, + ctx->vertices, + sizeof(ctx->vertices), + PIPE_BIND_VERTEX_BUFFER); return &ctx->base; } @@ -245,7 +241,6 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->dsa_write_depth_keep_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); pipe->delete_rasterizer_state(pipe, ctx->rs_state); pipe->delete_vs_state(pipe, ctx->vs); @@ -272,6 +267,12 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { + if (ctx->base.running) { + _debug_printf("u_blitter: Caught recursion on save. " + "This is a driver bug.\n"); + } + ctx->base.running = TRUE; + /* make sure these CSOs have been saved */ assert(ctx->base.saved_blend_state != INVALID_PTR && ctx->base.saved_dsa_state != INVALID_PTR && @@ -302,7 +303,6 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) ctx->base.saved_velem_state = INVALID_PTR; pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); - pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); pipe->set_clip_state(pipe, &ctx->base.saved_clip); @@ -346,6 +346,12 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) } ctx->base.saved_num_vertex_buffers = ~0; } + + if (!ctx->base.running) { + _debug_printf("u_blitter: Caught recursion on restore. " + "This is a driver bug.\n"); + } + ctx->base.running = FALSE; } static void blitter_set_rectangle(struct blitter_context_priv *ctx, @@ -509,22 +515,6 @@ static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, ctx->dst_height = height; } -static void blitter_draw_quad(struct blitter_context_priv *ctx) -{ - struct pipe_context *pipe = ctx->base.pipe; - struct pipe_box box; - - /* write vertices and draw them */ - u_box_1d(0, sizeof(ctx->vertices), &box); - pipe->transfer_inline_write(pipe, ctx->vbuf, 0, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &box, ctx->vertices, sizeof(ctx->vertices), 0); - - util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ -} - static INLINE void **blitter_get_sampler_state(struct blitter_context_priv *ctx, int miplevel, boolean normalized) @@ -649,15 +639,19 @@ static void blitter_draw_rectangle(struct blitter_context *blitter, } blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); - blitter_draw_quad(ctx); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); + util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, + PIPE_PRIM_TRIANGLE_FAN, 4, 2); } -void util_blitter_clear(struct blitter_context *blitter, - unsigned width, unsigned height, - unsigned num_cbufs, - unsigned clear_buffers, - const float *rgba, - double depth, unsigned stencil) +static void util_blitter_clear_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil, + void *custom_blend, void *custom_dsa) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; @@ -668,26 +662,28 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_check_saved_CSOs(ctx); /* bind CSOs */ - if (clear_buffers & PIPE_CLEAR_COLOR) + if (custom_blend) { + pipe->bind_blend_state(pipe, custom_blend); + } else if (clear_buffers & PIPE_CLEAR_COLOR) { pipe->bind_blend_state(pipe, ctx->blend_write_color); - else + } else { pipe->bind_blend_state(pipe, ctx->blend_keep_color); + } - if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { - sr.ref_value[0] = stencil & 0xff; + if (custom_dsa) { + pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa); + } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else if (clear_buffers & PIPE_CLEAR_DEPTH) { + } else if (clear_buffers & PIPE_CLEAR_DEPTH) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil); - } - else if (clear_buffers & PIPE_CLEAR_STENCIL) { - sr.ref_value[0] = stencil & 0xff; + } else if (clear_buffers & PIPE_CLEAR_STENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else + } else { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + } + + sr.ref_value[0] = stencil & 0xff; + pipe->set_stencil_ref(pipe, &sr); pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); @@ -700,6 +696,27 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } +void util_blitter_clear(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil) +{ + util_blitter_clear_custom(blitter, width, height, num_cbufs, + clear_buffers, rgba, depth, stencil, + NULL, NULL); +} + +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa) +{ + const float rgba[4] = {0, 0, 0, 0}; + util_blitter_clear_custom(blitter, width, height, 0, + 0, rgba, depth, 0, NULL, custom_dsa); +} + static boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) @@ -737,9 +754,6 @@ void util_blitter_copy_region(struct blitter_context *blitter, if (dst == src) { assert(!is_overlap(srcbox->x, srcbox->x + width, srcbox->y, srcbox->y + height, dstx, dstx + width, dsty, dsty + height)); - } else { - assert(util_is_format_compatible(util_format_description(src->format), - util_format_description(dst->format))); } assert(src->target < PIPE_MAX_TEXTURE_TYPES); /* XXX should handle 3d regions */ @@ -761,8 +775,10 @@ void util_blitter_copy_region(struct blitter_context *blitter, dst->nr_samples, bind, 0) || !screen->is_format_supported(screen, src->format, src->target, src->nr_samples, PIPE_BIND_SAMPLER_VIEW, 0)) { + ctx->base.running = TRUE; util_resource_copy_region(pipe, dst, dstlevel, dstx, dsty, dstz, src, srclevel, srcbox); + ctx->base.running = FALSE; return; } @@ -853,7 +869,10 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Draw. */ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); - blitter_draw_quad(ctx); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); + util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, + PIPE_PRIM_TRIANGLE_FAN, 4, 2); break; default: @@ -1014,12 +1033,3 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* flush a region of a depth stencil surface for r300g */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - util_blitter_custom_depth_stencil(blitter, dstsurf, NULL, - ctx->dsa_flush_depth_stencil, 0.0f); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 922a8580ac1..41470d92bba 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -55,13 +55,13 @@ struct blitter_context * \param y1 A Y coordinate of the top-left corner. * \param x2 An X coordinate of the bottom-right corner. * \param y2 A Y coordinate of the bottom-right corner. - * \param depth A depth which the rectangle is rendered at. + * \param depth A depth which the rectangle is rendered at. * * \param type Semantics of the attributes "attrib". * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes - * make up a constant RGBA color, and should go to the COLOR0 - * varying slot of a fragment shader. + * make up a constant RGBA color, and should go + * to the GENERIC0 varying slot of a fragment shader. * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and * {a3, a4} specify top-left and bottom-right texture * coordinates of the rectangle, respectively, and should go @@ -79,6 +79,9 @@ struct blitter_context enum blitter_attrib_type type, const float attrib[4]); + /* Whether the blitter is running. */ + boolean running; + /* Private members, really. */ struct pipe_context *pipe; /**< pipe context */ @@ -141,6 +144,10 @@ void util_blitter_clear(struct blitter_context *blitter, const float *rgba, double depth, unsigned stencil); +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa); + /** * Copy a block of pixels from one surface to another. * @@ -200,9 +207,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); - void util_blitter_custom_depth_stencil(struct blitter_context *blitter, struct pipe_surface *zsurf, struct pipe_surface *cbsurf, diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index f4ad545bee7..36ce4b57713 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -44,6 +44,7 @@ #include "util/u_surface.h" #include <limits.h> /* CHAR_BIT */ +#include <ctype.h> /* isalnum */ void _debug_vprintf(const char *format, va_list ap) { @@ -180,6 +181,48 @@ debug_get_num_option(const char *name, long dfault) return result; } +static boolean str_has_option(const char *str, const char *name) +{ + /* Empty string. */ + if (!*str) { + return FALSE; + } + + /* OPTION=all */ + if (!util_strcmp(str, "all")) { + return TRUE; + } + + /* Find 'name' in 'str' surrounded by non-alphanumeric characters. */ + { + const char *start = str; + unsigned name_len = strlen(name); + + /* 'start' is the beginning of the currently-parsed word, + * we increment 'str' each iteration. + * if we find either the end of string or a non-alphanumeric character, + * we compare 'start' up to 'str-1' with 'name'. */ + + while (1) { + if (!*str || !isalnum(*str)) { + if (str-start == name_len && + !memcmp(start, name, name_len)) { + return TRUE; + } + + if (!*str) { + return FALSE; + } + + start = str+1; + } + + str++; + } + } + + return FALSE; +} unsigned long debug_get_flags_option(const char *name, @@ -207,7 +250,7 @@ debug_get_flags_option(const char *name, else { result = 0; while( flags->name ) { - if (!util_strcmp(str, "all") || util_strstr(str, flags->name )) + if (str_has_option(str, flags->name)) result |= flags->value; ++flags; } @@ -359,6 +402,41 @@ const char *u_prim_name( unsigned prim ) +#ifdef DEBUG +int fl_indent = 0; +const char* fl_function[1024]; + +int debug_funclog_enter(const char* f, const int line, const char* file) +{ + int i; + + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); + + assert(fl_indent < 1023); + fl_function[fl_indent++] = f; + + return 0; +} + +void debug_funclog_exit(const char* f, const int line, const char* file) +{ + --fl_indent; + assert(fl_indent >= 0); + assert(fl_function[fl_indent] == f); +} + +void debug_funclog_enter_exit(const char* f, const int line, const char* file) +{ + int i; + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); +} +#endif + + #ifdef DEBUG /** diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 1c9624ea3ed..c47c13c64cf 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -280,6 +280,43 @@ debug_dump_flags(const struct debug_named_value *names, /** + * Function enter exit loggers + */ +#ifdef DEBUG +int debug_funclog_enter(const char* f, const int line, const char* file); +void debug_funclog_exit(const char* f, const int line, const char* file); +void debug_funclog_enter_exit(const char* f, const int line, const char* file); + +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around = \ + debug_funclog_enter(__FUNCTION__, __LINE__, __FILE__) +#define DEBUG_FUNCLOG_EXIT() \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return; \ + } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return ret; \ + } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() \ + debug_funclog_enter_exit(__FUNCTION__, __LINE__, __FILE__) + +#else +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around +#define DEBUG_FUNCLOG_EXIT() \ + do { (void)__debug_decleration_work_around; return; } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { (void)__debug_decleration_work_around; return ret; } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() +#endif + + +/** * Get option. * * It is an alias for getenv on Linux. diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c index 40a26c9c697..6f706a35fda 100644 --- a/src/gallium/auxiliary/util/u_debug_refcnt.c +++ b/src/gallium/auxiliary/util/u_debug_refcnt.c @@ -43,7 +43,8 @@ int debug_refcnt_state; struct os_stream* stream; /* TODO: maybe move this serial machinery to a stand-alone module and expose it? */ -static pipe_mutex serials_mutex; +pipe_static_mutex(serials_mutex); + static struct util_hash_table* serials_hash; static unsigned serials_last; @@ -66,6 +67,15 @@ static boolean debug_serial(void* p, unsigned* pserial) { unsigned serial; boolean found = TRUE; +#ifdef PIPE_SUBSYSTEM_WINDOWS_USER + static boolean first = TRUE; + + if (first) { + pipe_mutex_init(serials_mutex); + first = FALSE; + } +#endif + pipe_mutex_lock(serials_mutex); if(!serials_hash) serials_hash = util_hash_table_create(hash_ptr, compare_ptr); diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 44d437747a1..bae9be87a26 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -40,20 +40,43 @@ #include "u_debug_symbol.h" #include "u_hash_table.h" -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) #include <windows.h> #include <stddef.h> -#include <imagehlp.h> -/* - * TODO: Cleanup code. - * TODO: Support x86_64 - */ +#include "dbghelp.h" + static BOOL bSymInitialized = FALSE; -static HMODULE hModule_Imagehlp = NULL; +static HMODULE hModule_Dbghelp = NULL; + + +static +FARPROC WINAPI __GetProcAddress(LPCSTR lpProcName) +{ +#ifdef PIPE_CC_GCC + if (!hModule_Dbghelp) { + /* + * bfdhelp.dll is a dbghelp.dll look-alike replacement, which is able to + * understand MinGW symbols using BFD library. It is available from + * http://people.freedesktop.org/~jrfonseca/bfdhelp/ for now. + */ + hModule_Dbghelp = LoadLibraryA("bfdhelp.dll"); + } +#endif + + if (!hModule_Dbghelp) { + hModule_Dbghelp = LoadLibraryA("dbghelp.dll"); + if (!hModule_Dbghelp) { + return NULL; + } + } + + return GetProcAddress(hModule_Dbghelp, lpProcName); +} + typedef BOOL (WINAPI *PFNSYMINITIALIZE)(HANDLE, LPSTR, BOOL); static PFNSYMINITIALIZE pfnSymInitialize = NULL; @@ -62,8 +85,7 @@ static BOOL WINAPI j_SymInitialize(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) GetProcAddress(hModule_Imagehlp, "SymInitialize"))) + (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) __GetProcAddress("SymInitialize"))) ) return pfnSymInitialize(hProcess, UserSearchPath, fInvadeProcess); else @@ -77,57 +99,41 @@ static DWORD WINAPI j_SymSetOptions(DWORD SymOptions) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) GetProcAddress(hModule_Imagehlp, "SymSetOptions"))) + (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) __GetProcAddress("SymSetOptions"))) ) return pfnSymSetOptions(SymOptions); else return FALSE; } -typedef PGET_MODULE_BASE_ROUTINE PFNSYMGETMODULEBASE; -static PFNSYMGETMODULEBASE pfnSymGetModuleBase = NULL; +typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD64, PDWORD64, PSYMBOL_INFO); +static PFNSYMGETSYMFROMADDR pfnSymFromAddr = NULL; static -DWORD WINAPI j_SymGetModuleBase(HANDLE hProcess, DWORD dwAddr) +BOOL WINAPI j_SymFromAddr(HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, PSYMBOL_INFO Symbol) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymGetModuleBase || (pfnSymGetModuleBase = (PFNSYMGETMODULEBASE) GetProcAddress(hModule_Imagehlp, "SymGetModuleBase"))) + (pfnSymFromAddr || (pfnSymFromAddr = (PFNSYMGETSYMFROMADDR) __GetProcAddress("SymFromAddr"))) ) - return pfnSymGetModuleBase(hProcess, dwAddr); - else - return 0; -} - -typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_SYMBOL); -static PFNSYMGETSYMFROMADDR pfnSymGetSymFromAddr = NULL; - -static -BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacement, PIMAGEHLP_SYMBOL Symbol) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymGetSymFromAddr || (pfnSymGetSymFromAddr = (PFNSYMGETSYMFROMADDR) GetProcAddress(hModule_Imagehlp, "SymGetSymFromAddr"))) - ) - return pfnSymGetSymFromAddr(hProcess, Address, Displacement, Symbol); + return pfnSymFromAddr(hProcess, Address, Displacement, Symbol); else return FALSE; } static INLINE void -debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size) +debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size) { HANDLE hProcess; BYTE symbolBuffer[1024]; - PIMAGEHLP_SYMBOL pSymbol = (PIMAGEHLP_SYMBOL) symbolBuffer; - DWORD dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO) symbolBuffer; + DWORD64 dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ hProcess = GetCurrentProcess(); + memset(pSymbol, 0, sizeof *pSymbol); pSymbol->SizeOfStruct = sizeof(symbolBuffer); - pSymbol->MaxNameLength = sizeof(symbolBuffer) - offsetof(IMAGEHLP_SYMBOL, Name); + pSymbol->MaxNameLen = sizeof(symbolBuffer) - offsetof(SYMBOL_INFO, Name); if(!bSymInitialized) { j_SymSetOptions(/* SYMOPT_UNDNAME | */ SYMOPT_LOAD_LINES); @@ -135,7 +141,7 @@ debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size) bSymInitialized = TRUE; } - if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol)) + if(!j_SymFromAddr(hProcess, (DWORD64)(uintptr_t)addr, &dwDisplacement, pSymbol)) buf[0] = 0; else { @@ -165,8 +171,8 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size) void debug_symbol_name(const void *addr, char* buf, unsigned size) { -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) - debug_symbol_name_imagehlp(addr, buf, size); +#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) + debug_symbol_name_dbghelp(addr, buf, size); if(buf[0]) return; #endif @@ -190,7 +196,7 @@ debug_symbol_print(const void *addr) } struct util_hash_table* symbols_hash; -pipe_mutex symbols_mutex; +pipe_static_mutex(symbols_mutex); static unsigned hash_ptr(void* p) { @@ -211,6 +217,15 @@ const char* debug_symbol_name_cached(const void *addr) { const char* name; +#ifdef PIPE_SUBSYSTEM_WINDOWS_USER + static boolean first = TRUE; + + if (first) { + pipe_mutex_init(symbols_mutex); + first = FALSE; + } +#endif + pipe_mutex_lock(symbols_mutex); if(!symbols_hash) symbols_hash = util_hash_table_create(hash_ptr, compare_ptr); diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 0b6dc5880f3..0defd919974 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -31,6 +31,7 @@ #include "util/u_inlines.h" #include "util/u_draw_quad.h" #include "util/u_memory.h" +#include "cso_cache/cso_context.h" /** @@ -39,6 +40,7 @@ */ void util_draw_vertex_buffer(struct pipe_context *pipe, + struct cso_context *cso, struct pipe_resource *vbuf, uint offset, uint prim_type, @@ -54,8 +56,12 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.buffer = vbuf; vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; - vbuffer.max_index = num_verts - 1; - pipe->set_vertex_buffers(pipe, 1, &vbuffer); + + if (cso) { + cso_set_vertex_buffers(cso, 1, &vbuffer); + } else { + pipe->set_vertex_buffers(pipe, 1, &vbuffer); + } /* note: vertex elements already set by caller */ @@ -70,7 +76,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, * Note: this isn't especially efficient. */ void -util_draw_texquad(struct pipe_context *pipe, +util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso, float x0, float y0, float x1, float y1, float z) { uint numAttribs = 2, i, j; @@ -118,7 +124,7 @@ util_draw_texquad(struct pipe_context *pipe, if (!vbuf) goto out; - util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + util_draw_vertex_buffer(pipe, cso, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); out: if (vbuf) diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 52994fe05c3..f1167786f0e 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -38,17 +38,18 @@ extern "C" { #endif struct pipe_resource; +struct cso_context; #include "util/u_draw.h" extern void -util_draw_vertex_buffer(struct pipe_context *pipe, +util_draw_vertex_buffer(struct pipe_context *pipe, struct cso_context *cso, struct pipe_resource *vbuf, uint offset, uint num_attribs, uint num_verts, uint prim_type); extern void -util_draw_texquad(struct pipe_context *pipe, +util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso, float x0, float y0, float x1, float y1, float z); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index b471d59eebf..5ecf8cbb067 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -681,7 +681,6 @@ util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffe util_dump_struct_begin(stream, "pipe_vertex_buffer"); util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, max_index); util_dump_member(stream, uint, state, buffer_offset); util_dump_member(stream, ptr, state, buffer); diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 03b73c0e98f..7659a802a41 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -673,7 +673,8 @@ util_format_has_alpha(enum pipe_format format) } /** - * Return the matching SRGB format, or PIPE_FORMAT_NONE if none. + * Given a linear RGB colorspace format, return the corresponding SRGB + * format, or PIPE_FORMAT_NONE if none. */ static INLINE enum pipe_format util_format_srgb(enum pipe_format format) @@ -711,6 +712,45 @@ util_format_srgb(enum pipe_format format) } /** + * Given an sRGB format, return the corresponding linear colorspace format. + * For non sRGB formats, return the format unchanged. + */ +static INLINE enum pipe_format +util_format_linear(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_SRGB: + return PIPE_FORMAT_L8_UNORM; + case PIPE_FORMAT_L8A8_SRGB: + return PIPE_FORMAT_L8A8_UNORM; + case PIPE_FORMAT_R8G8B8_SRGB: + return PIPE_FORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_A8B8G8R8_SRGB: + return PIPE_FORMAT_A8B8G8R8_UNORM; + case PIPE_FORMAT_X8B8G8R8_SRGB: + return PIPE_FORMAT_X8B8G8R8_UNORM; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return PIPE_FORMAT_B8G8R8A8_UNORM; + case PIPE_FORMAT_B8G8R8X8_SRGB: + return PIPE_FORMAT_B8G8R8X8_UNORM; + case PIPE_FORMAT_A8R8G8B8_SRGB: + return PIPE_FORMAT_A8R8G8B8_UNORM; + case PIPE_FORMAT_X8R8G8B8_SRGB: + return PIPE_FORMAT_X8R8G8B8_UNORM; + case PIPE_FORMAT_DXT1_SRGB: + return PIPE_FORMAT_DXT1_RGB; + case PIPE_FORMAT_DXT1_SRGBA: + return PIPE_FORMAT_DXT1_RGBA; + case PIPE_FORMAT_DXT3_SRGBA: + return PIPE_FORMAT_DXT3_RGBA; + case PIPE_FORMAT_DXT5_SRGBA: + return PIPE_FORMAT_DXT5_RGBA; + default: + return format; + } +} + +/** * Return the number of components stored. * Formats with block size != 1x1 will always have 1 component (the block). */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d22ae8b375b..3b6342ad8d1 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1350,6 +1350,7 @@ get_next_slot(struct gen_mipmap_state *ctx) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } @@ -1616,7 +1617,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, face, rcoord); - util_draw_vertex_buffer(ctx->pipe, + util_draw_vertex_buffer(ctx->pipe, + ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c index f2c9db3caf1..d0a28b5fdfa 100644 --- a/src/gallium/auxiliary/util/u_index_modify.c +++ b/src/gallium/auxiliary/util/u_index_modify.c @@ -38,7 +38,10 @@ void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, unsigned short *out_map = out; unsigned i; - in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &src_transfer); + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &src_transfer); in_map += start; for (i = 0; i < count; i++) { @@ -62,6 +65,7 @@ void util_shorten_ubyte_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, @@ -87,7 +91,10 @@ void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, unsigned short *out_map = out; unsigned i; - in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &in_transfer); + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &in_transfer); in_map += start; for (i = 0; i < count; i++) { @@ -110,6 +117,7 @@ void util_rebuild_ushort_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, @@ -135,7 +143,10 @@ void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, unsigned int *out_map = out; unsigned i; - in_map = pipe_buffer_map(context, elts, PIPE_TRANSFER_READ, &in_transfer); + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &in_transfer); in_map += start; for (i = 0; i < count; i++) { @@ -158,6 +169,7 @@ void util_rebuild_uint_elts(struct pipe_context *context, new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); out_map = pipe_buffer_map(context, new_elts, diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index b4870bce981..98889fb70ac 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -160,6 +160,21 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, pipe_surface_reset(ctx, ps, pt, level, layer, flags); } +/* Return true if the surfaces are equal. */ +static INLINE boolean +pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2) +{ + return s1->texture == s2->texture && + s1->format == s2->format && + (s1->texture->target != PIPE_BUFFER || + (s1->u.buf.first_element == s2->u.buf.first_element && + s1->u.buf.last_element == s2->u.buf.last_element)) && + (s1->texture->target == PIPE_BUFFER || + (s1->u.tex.level == s2->u.tex.level && + s1->u.tex.first_layer == s2->u.tex.first_layer && + s1->u.tex.last_layer == s2->u.tex.last_layer)); +} + /* * Convenience wrappers for screen buffer functions. */ @@ -167,6 +182,7 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, static INLINE struct pipe_resource * pipe_buffer_create( struct pipe_screen *screen, unsigned bind, + unsigned usage, unsigned size ) { struct pipe_resource buffer; @@ -174,7 +190,7 @@ pipe_buffer_create( struct pipe_screen *screen, buffer.target = PIPE_BUFFER; buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */ buffer.bind = bind; - buffer.usage = PIPE_USAGE_DEFAULT; + buffer.usage = usage; buffer.flags = 0; buffer.width0 = size; buffer.height0 = 1; @@ -220,6 +236,7 @@ pipe_buffer_map_range(struct pipe_context *pipe, map = pipe->transfer_map( pipe, *transfer ); if (map == NULL) { pipe->transfer_destroy( pipe, *transfer ); + *transfer = NULL; return NULL; } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 37294b7203f..30555f92a6d 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -176,7 +176,7 @@ static INLINE float logf( float f ) #define isfinite(x) _finite((double)(x)) #define isnan(x) _isnan((double)(x)) -#endif +#endif /* _MSC_VER < 1400 && !defined(__cplusplus) */ static INLINE double log2( double x ) { @@ -184,6 +184,18 @@ static INLINE double log2( double x ) return log( x ) * invln2; } +static INLINE double +round(double x) +{ + return x >= 0.0 ? floor(x + 0.5) : ceil(x - 0.5); +} + +static INLINE float +roundf(float x) +{ + return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f); +} + #endif /* _MSC_VER */ diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c index 443c9f8067e..ea6896b430f 100644 --- a/src/gallium/auxiliary/util/u_resource.c +++ b/src/gallium/auxiliary/util/u_resource.c @@ -35,7 +35,7 @@ unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, struct pipe_resource *resource, unsigned level, - enum pipe_transfer_usage usage, + unsigned usage, const struct pipe_box *box) { struct u_resource *ur = u_resource(resource); diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 44cadbfcdd0..e3c7085ba92 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -367,47 +367,19 @@ pipe_get_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, float *p) { - unsigned dst_stride = w * 4; - void *packed; - enum pipe_format format = pt->resource->format; - - if (u_clip_tile(x, y, &w, &h, &pt->box)) - return; - - packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); - - if (!packed) - return; - - if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) - assert((x & 1) == 0); - - pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); - - pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); - - FREE(packed); + pipe_get_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p); } void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p) +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p) { unsigned dst_stride = w * 4; void *packed; - uint iy; - float rgba01[6]; if (u_clip_tile(x, y, &w, &h, &pt->box)) { return; @@ -427,35 +399,6 @@ pipe_get_tile_swizzle(struct pipe_context *pipe, pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); - - if (swizzle_r == PIPE_SWIZZLE_RED && - swizzle_g == PIPE_SWIZZLE_GREEN && - swizzle_b == PIPE_SWIZZLE_BLUE && - swizzle_a == PIPE_SWIZZLE_ALPHA) { - /* no-op, skip */ - return; - } - - rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; - rgba01[PIPE_SWIZZLE_ONE] = 1.0f; - - for (iy = 0; iy < h; iy++) { - float *row = p; - uint ix; - - for (ix = 0; ix < w; ix++) { - rgba01[PIPE_SWIZZLE_RED] = row[0]; - rgba01[PIPE_SWIZZLE_GREEN] = row[1]; - rgba01[PIPE_SWIZZLE_BLUE] = row[2]; - rgba01[PIPE_SWIZZLE_ALPHA] = row[3]; - - *row++ = rgba01[swizzle_r]; - *row++ = rgba01[swizzle_g]; - *row++ = rgba01[swizzle_b]; - *row++ = rgba01[swizzle_a]; - } - p += dst_stride; - } } @@ -465,9 +408,19 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p) { + pipe_put_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p); +} + + +void +pipe_put_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + const float *p) +{ unsigned src_stride = w * 4; void *packed; - enum pipe_format format = pt->resource->format; if (u_clip_tile(x, y, &w, &h, &pt->box)) return; diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 558351d0ce5..0ba274308fe 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -80,18 +80,11 @@ pipe_get_tile_rgba(struct pipe_context *pipe, float *p); void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p); +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p); void pipe_put_tile_rgba(struct pipe_context *pipe, @@ -99,6 +92,13 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p); +void +pipe_put_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + const float *p); + void pipe_get_tile_z(struct pipe_context *pipe, diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c index e2828cfd99e..b6c63d9642f 100644 --- a/src/gallium/auxiliary/util/u_transfer.c +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -112,3 +112,10 @@ void u_default_transfer_destroy(struct pipe_context *pipe, FREE(transfer); } +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + resource->width0 = MAX2(resource->width0, offset + size); +} diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h index 3412e13c3cc..8cf9c418b04 100644 --- a/src/gallium/auxiliary/util/u_transfer.h +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -93,7 +93,7 @@ struct u_resource_vtbl { struct u_resource { struct pipe_resource b; - struct u_resource_vtbl *vtbl; + const struct u_resource_vtbl *vtbl; }; @@ -136,11 +136,9 @@ void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, unsigned stride, unsigned layer_stride); - - - - - - +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); #endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 3b3d5b418fe..dcf800a1e8e 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -85,7 +85,12 @@ void u_upload_flush( struct u_upload_mgr *upload ) { /* Unmap and unreference the upload buffer. */ if (upload->transfer) { + if (upload->size) { + pipe_buffer_flush_mapped_range(upload->pipe, upload->transfer, + 0, upload->size); + } pipe_transfer_unmap(upload->pipe, upload->transfer); + pipe_transfer_destroy(upload->pipe, upload->transfer); upload->transfer = NULL; } pipe_resource_reference( &upload->buffer, NULL ); @@ -116,13 +121,17 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, upload->buffer = pipe_buffer_create( upload->pipe->screen, upload->bind, + PIPE_USAGE_STREAM, size ); if (upload->buffer == NULL) goto fail; /* Map the new buffer. */ - upload->map = pipe_buffer_map(upload->pipe, upload->buffer, - PIPE_TRANSFER_WRITE, &upload->transfer); + upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer, + 0, size, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_FLUSH_EXPLICIT, + &upload->transfer); upload->size = size; diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c new file mode 100644 index 00000000000..dec8dd717e8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -0,0 +1,601 @@ +/************************************************************************** + * + * Copyright 2011 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_vbuf_mgr.h" + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* Hardware vertex fetcher limitations can be described by this structure. */ +struct u_vbuf_caps { + /* Vertex format CAPs. */ + /* TRUE if hardware supports it. */ + unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ + unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ + unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ + unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ + unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + + /* Whether vertex fetches don't have to be dword-aligned. */ + /* TRUE if hardware supports it. */ + unsigned fetch_dword_unaligned:1; +}; + +struct u_vbuf_mgr_elements { + unsigned count; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + /* If (velem[i].src_format != real_format[i]), the vertex buffer + * referenced by the vertex element cannot be used for rendering and + * its vertex data must be translated to real_format[i]. */ + enum pipe_format native_format[PIPE_MAX_ATTRIBS]; + unsigned native_format_size[PIPE_MAX_ATTRIBS]; + + /* This might mean two things: + * - src_format != native_format, as discussed above. + * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ + boolean incompatible_layout; +}; + +struct u_vbuf_mgr_priv { + struct u_vbuf_mgr b; + struct u_vbuf_caps caps; + struct pipe_context *pipe; + + struct translate_cache *translate_cache; + unsigned translate_vb_slot; + + struct u_vbuf_mgr_elements *ve; + void *saved_ve, *fallback_ve; + boolean ve_binding_lock; + + boolean any_user_vbs; + boolean incompatible_vb_layout; +}; + +static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr) +{ + struct pipe_screen *screen = mgr->pipe->screen; + + mgr->caps.format_fixed32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0); + + mgr->caps.format_float16 = + screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0); + + mgr->caps.format_float64 = + screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0); + + mgr->caps.format_norm32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0); + + mgr->caps.format_scaled32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER, 0); +} + +struct u_vbuf_mgr * +u_vbuf_mgr_create(struct pipe_context *pipe, + unsigned upload_buffer_size, + unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, + enum u_fetch_alignment fetch_alignment) +{ + struct u_vbuf_mgr_priv *mgr = CALLOC_STRUCT(u_vbuf_mgr_priv); + + mgr->pipe = pipe; + mgr->translate_cache = translate_cache_create(); + + mgr->b.uploader = u_upload_create(pipe, upload_buffer_size, + upload_buffer_alignment, + upload_buffer_bind); + + mgr->caps.fetch_dword_unaligned = + fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; + + u_vbuf_mgr_init_format_caps(mgr); + + return &mgr->b; +} + +void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) { + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + } + + translate_cache_destroy(mgr->translate_cache); + u_upload_destroy(mgr->b.uploader); + FREE(mgr); +} + + +static void u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, + int min_index, int max_index, + boolean *upload_flushed) +{ + struct translate_key key; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS]; + struct translate *tr; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; + struct pipe_resource *out_buffer = NULL; + unsigned i, num_verts, out_offset; + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + memset(&key, 0, sizeof(key)); + memset(tr_elem_index, 0xff, sizeof(tr_elem_index)); + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < mgr->ve->count; i++) { + struct pipe_vertex_buffer *vb = + &mgr->b.vertex_buffer[mgr->ve->ve[i].vertex_buffer_index]; + enum pipe_format output_format = mgr->ve->native_format[i]; + unsigned output_format_size = mgr->ve->native_format_size[i]; + + /* Check for support. */ + if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] && + (mgr->caps.fetch_dword_unaligned || + (vb->buffer_offset % 4 == 0 && + vb->stride % 4 == 0 && + mgr->ve->ve[i].src_offset % 4 == 0))) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = mgr->ve->ve[i].vertex_buffer_index; + te->input_format = mgr->ve->ve[i].src_format; + te->input_offset = mgr->ve->ve[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[mgr->ve->ve[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(mgr->translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(mgr->pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, + vb_map[i] + vb->buffer_offset + vb->stride * min_index, + vb->stride, ~0); + } + } + + /* Create and map the output buffer. */ + num_verts = max_index + 1 - min_index; + + u_upload_alloc(mgr->b.uploader, + key.output_stride * min_index, + key.output_stride * num_verts, + &out_offset, &out_buffer, upload_flushed, + (void**)&out_map); + + out_offset -= key.output_stride * min_index; + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); + } + } + + /* Setup the new vertex buffer in the first free slot. */ + mgr->translate_vb_slot = ~0; + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (!mgr->b.vertex_buffer[i].buffer) { + mgr->translate_vb_slot = i; + + if (i >= mgr->b.nr_vertex_buffers) { + mgr->b.nr_real_vertex_buffers = i+1; + } + break; + } + } + + if (mgr->translate_vb_slot != ~0) { + /* Setup the new vertex buffer. */ + pipe_resource_reference( + &mgr->b.real_vertex_buffer[mgr->translate_vb_slot], out_buffer); + mgr->b.vertex_buffer[mgr->translate_vb_slot].buffer_offset = out_offset; + mgr->b.vertex_buffer[mgr->translate_vb_slot].stride = key.output_stride; + + /* Setup new vertex elements. */ + for (i = 0; i < mgr->ve->count; i++) { + if (tr_elem_index[i] < key.nr_elements) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = mgr->translate_vb_slot; + } else { + memcpy(&new_velems[i], &mgr->ve->ve[i], + sizeof(struct pipe_vertex_element)); + } + } + + mgr->fallback_ve = + mgr->pipe->create_vertex_elements_state(mgr->pipe, mgr->ve->count, + new_velems); + + /* Preserve saved_ve. */ + mgr->ve_binding_lock = TRUE; + mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->fallback_ve); + mgr->ve_binding_lock = FALSE; + } + + pipe_resource_reference(&out_buffer, NULL); +} + +static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr) +{ + if (mgr->fallback_ve == NULL) { + return; + } + + /* Restore vertex elements. */ + /* Note that saved_ve will be overwritten in bind_vertex_elements_state. */ + mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->saved_ve); + mgr->pipe->delete_vertex_elements_state(mgr->pipe, mgr->fallback_ve); + mgr->fallback_ve = NULL; + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&mgr->b.real_vertex_buffer[mgr->translate_vb_slot], + NULL); + mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers; +} + +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break + +struct u_vbuf_mgr_elements * +u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, + unsigned count, + const struct pipe_vertex_element *attribs, + struct pipe_vertex_element *native_attribs) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + struct u_vbuf_mgr_elements *ve = CALLOC_STRUCT(u_vbuf_mgr_elements); + + ve->count = count; + + if (!count) { + return ve; + } + + memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); + memcpy(native_attribs, attribs, sizeof(struct pipe_vertex_element) * count); + + /* Set the best native format in case the original format is not + * supported. */ + for (i = 0; i < count; i++) { + enum pipe_format format = ve->ve[i].src_format; + + /* Choose a native format. + * For now we don't care about the alignment, that's going to + * be sorted out later. */ + if (!mgr->caps.format_fixed32) { + switch (format) { + FORMAT_REPLACE(R32_FIXED, R32_FLOAT); + FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_float16) { + switch (format) { + FORMAT_REPLACE(R16_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R16G16B16_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R16G16B16A16_FLOAT, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_float64) { + switch (format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_norm32) { + switch (format) { + FORMAT_REPLACE(R32_UNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); + FORMAT_REPLACE(R32_SNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_scaled32) { + switch (format) { + FORMAT_REPLACE(R32_USCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); + FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); + default:; + } + } + + native_attribs[i].src_format = format; + ve->native_format[i] = format; + ve->native_format_size[i] = + util_format_get_blocksize(ve->native_format[i]); + + ve->incompatible_layout = + ve->incompatible_layout || + ve->ve[i].src_format != ve->native_format[i] || + (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); + } + + /* Align the formats to the size of DWORD if needed. */ + if (!mgr->caps.fetch_dword_unaligned) { + for (i = 0; i < count; i++) { + ve->native_format_size[i] = align(ve->native_format_size[i], 4); + } + } + + return ve; +} + +void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgrb, + void *cso, + struct u_vbuf_mgr_elements *ve) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + + if (!cso) { + return; + } + + if (!mgr->ve_binding_lock) { + mgr->saved_ve = cso; + mgr->ve = ve; + } +} + +void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr, + struct u_vbuf_mgr_elements *ve) +{ + FREE(ve); +} + +void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, + unsigned count, + const struct pipe_vertex_buffer *bufs) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + mgr->b.max_index = ~0; + mgr->any_user_vbs = FALSE; + mgr->incompatible_vb_layout = FALSE; + + if (!mgr->caps.fetch_dword_unaligned) { + /* Check if the strides and offsets are aligned to the size of DWORD. */ + for (i = 0; i < count; i++) { + if (bufs[i].buffer) { + if (bufs[i].stride % 4 != 0 || + bufs[i].buffer_offset % 4 != 0) { + mgr->incompatible_vb_layout = TRUE; + break; + } + } + } + } + + for (i = 0; i < count; i++) { + const struct pipe_vertex_buffer *vb = &bufs[i]; + + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + + if (u_vbuf_resource(vb->buffer)->user_ptr) { + mgr->any_user_vbs = TRUE; + continue; + } + + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], vb->buffer); + + /* The stride of zero means we will be fetching only the first + * vertex, so don't care about max_index. */ + if (!vb->stride) { + continue; + } + + /* Update the maximum index. */ + mgr->b.max_index = + MIN2(mgr->b.max_index, + (vb->buffer->width0 - vb->buffer_offset) / vb->stride); + } + + for (; i < mgr->b.nr_real_vertex_buffers; i++) { + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + } + + memcpy(mgr->b.vertex_buffer, bufs, + sizeof(struct pipe_vertex_buffer) * count); + + mgr->b.nr_vertex_buffers = count; + mgr->b.nr_real_vertex_buffers = count; +} + +static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, + int min_index, int max_index, + boolean *upload_flushed) +{ + int i, nr = mgr->ve->count; + unsigned count = max_index + 1 - min_index; + boolean uploaded[PIPE_MAX_ATTRIBS] = {0}; + + for (i = 0; i < nr; i++) { + unsigned index = mgr->ve->ve[i].vertex_buffer_index; + struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index]; + + if (vb->buffer && + u_vbuf_resource(vb->buffer)->user_ptr && + !uploaded[index]) { + unsigned first, size; + boolean flushed; + + if (vb->stride) { + first = vb->stride * min_index; + size = vb->stride * count; + } else { + first = 0; + size = mgr->ve->native_format_size[i]; + } + + u_upload_data(mgr->b.uploader, first, size, + u_vbuf_resource(vb->buffer)->user_ptr + first, + &vb->buffer_offset, + &mgr->b.real_vertex_buffer[index], + &flushed); + + vb->buffer_offset -= first; + + uploaded[index] = TRUE; + *upload_flushed = *upload_flushed || flushed; + } else { + assert(mgr->b.real_vertex_buffer[index]); + } + } +} + +void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, + const struct pipe_draw_info *info, + boolean *buffers_updated, + boolean *uploader_flushed) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + boolean bufs_updated = FALSE, upload_flushed = FALSE; + int min_index, max_index; + + min_index = info->min_index - info->index_bias; + max_index = info->max_index - info->index_bias; + + /* Translate vertices with non-native layouts or formats. */ + if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) { + u_vbuf_translate_begin(mgr, min_index, max_index, &upload_flushed); + + if (mgr->fallback_ve) { + bufs_updated = TRUE; + } + } + + /* Upload user buffers. */ + if (mgr->any_user_vbs) { + u_vbuf_upload_buffers(mgr, min_index, max_index, &upload_flushed); + bufs_updated = TRUE; + } + + /* Set the return values. */ + if (buffers_updated) { + *buffers_updated = bufs_updated; + } + if (uploader_flushed) { + *uploader_flushed = upload_flushed; + } +} + +void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + + if (mgr->fallback_ve) { + u_vbuf_translate_end(mgr); + } +} diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h new file mode 100644 index 00000000000..8b241854c83 --- /dev/null +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2011 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_VBUF_MGR_H +#define U_VBUF_MGR_H + +/* This module builds upon u_upload_mgr and translate_cache and takes care of + * user buffer uploads and vertex format fallbacks. It's designed + * for the drivers which don't always use the Draw module. (e.g. for HWTCL) + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_transfer.h" + +/* The manager. + * This structure should also be used to access vertex buffers + * from a driver. */ +struct u_vbuf_mgr { + /* This is what was set in set_vertex_buffers. + * May contain user buffers. */ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + /* Contains only real vertex buffers. + * Hardware drivers should use real_vertex_buffers[i] + * instead of vertex_buffers[i].buffer. */ + struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; + int nr_real_vertex_buffers; + + /* Precomputed max_index for hardware vertex buffers. */ + int max_index; + + /* This uploader can optionally be used by the driver. + * + * Allowed functions: + * - u_upload_alloc + * - u_upload_data + * - u_upload_buffer + * - u_upload_flush */ + struct u_upload_mgr *uploader; +}; + +struct u_vbuf_resource { + struct u_resource b; + uint8_t *user_ptr; +}; + +/* Opaque type containing information about vertex elements for the manager. */ +struct u_vbuf_mgr_elements; + +enum u_fetch_alignment { + U_VERTEX_FETCH_BYTE_ALIGNED, + U_VERTEX_FETCH_DWORD_ALIGNED +}; + + +struct u_vbuf_mgr * +u_vbuf_mgr_create(struct pipe_context *pipe, + unsigned upload_buffer_size, + unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, + enum u_fetch_alignment fetch_alignment); + +void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgr); + +struct u_vbuf_mgr_elements * +u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgr, + unsigned count, + const struct pipe_vertex_element *attrs, + struct pipe_vertex_element *native_attrs); + +void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgr, + void *cso, + struct u_vbuf_mgr_elements *ve); + +void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr, + struct u_vbuf_mgr_elements *ve); + +void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgr, + unsigned count, + const struct pipe_vertex_buffer *bufs); + +void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgr, + const struct pipe_draw_info *info, + boolean *buffers_updated, + boolean *uploader_flushed); + +void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgr); + + +static INLINE struct u_vbuf_resource *u_vbuf_resource(struct pipe_resource *r) +{ + return (struct u_vbuf_resource*)r; +} + +#endif diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index d7b29497ace..d1ba5faf788 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -247,13 +247,13 @@ init_buffers(struct vl_compositor *c) * Create our vertex buffer and vertex buffer elements */ c->vertex_buf.stride = sizeof(struct vertex4f); - c->vertex_buf.max_index = (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 - 1; c->vertex_buf.buffer_offset = 0; /* XXX: Create with DYNAMIC or STREAM */ c->vertex_buf.buffer = pipe_buffer_create ( c->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 ); @@ -276,6 +276,7 @@ init_buffers(struct vl_compositor *c) ( c->pipe->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, sizeof(struct fragment_shader_consts) ); diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 5d472f93481..89463a5c75c 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -428,7 +428,6 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) /* create textures */ memset(&template, 0, sizeof(struct pipe_resource)); template.last_level = 0; - template.depth0 = 1; template.bind = PIPE_BIND_SAMPLER_VIEW; template.flags = 0; @@ -437,6 +436,7 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) template.width0 = idct->buffer_width / 4; template.height0 = idct->buffer_height; template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_STREAM; buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); @@ -478,7 +478,6 @@ init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); buffer->vertex_bufs.individual.quad.stride = idct->quad.stride; - buffer->vertex_bufs.individual.quad.max_index = idct->quad.max_index; buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset; pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer); @@ -526,6 +525,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe) template.width0 = 2; template.height0 = 8; template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_IMMUTABLE; template.bind = PIPE_BIND_SAMPLER_VIEW; template.flags = 0; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index de83b6a5338..484e781f0cb 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -888,6 +888,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 template.width0 = renderer->buffer_width; template.height0 = renderer->buffer_height; template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_STATIC; template.bind = PIPE_BIND_SAMPLER_VIEW; template.flags = 0; @@ -928,7 +929,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 } buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride; - buffer->vertex_bufs.individual.quad.max_index = renderer->quad.max_index; buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset; pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer); diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index 8599ed3533d..552a0451fef 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -53,12 +53,12 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks) /* create buffer */ quad.stride = sizeof(struct vertex2f); - quad.max_index = 4 * max_blocks - 1; quad.buffer_offset = 0; quad.buffer = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(struct vertex2f) * 4 * max_blocks ); @@ -100,7 +100,7 @@ unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements, unsigned vertex_buffer_index) { - unsigned i, size, offset = 0; + unsigned i, offset = 0; assert(elements && num_elements); @@ -126,12 +126,12 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, buffer->stride = stride; buf.stride = stride; - buf.max_index = 4 * max_blocks - 1; buf.buffer_offset = 0; buf.buffer = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, stride * 4 * max_blocks ); diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index 11e77190895..0a9e7e50f1d 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -337,7 +337,6 @@ IaSetTopology + Gallium supports line loops, triangle fans, quads, quad strips and polygons IaSetVertexBuffers -> set_vertex_buffers - + Gallium allows to specify a max_index here - Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset OpenResource -> texture_from_handle diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 6760e7be4b9..c6812cd309e 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -3,8 +3,12 @@ Context ======= -The context object represents the purest, most directly accessible, abilities -of the device's 3D rendering pipeline. +A Gallium rendering context encapsulates the state which effects 3D +rendering such as blend state, depth/stencil state, texture samplers, +etc. + +Note that resource/texture allocation is not per-context but per-screen. + Methods ------- @@ -12,20 +16,23 @@ Methods CSO State ^^^^^^^^^ -All CSO state is created, bound, and destroyed, with triplets of methods that -all follow a specific naming scheme. For example, ``create_blend_state``, -``bind_blend_state``, and ``destroy_blend_state``. +All Constant State Object (CSO) state is created, bound, and destroyed, +with triplets of methods that all follow a specific naming scheme. +For example, ``create_blend_state``, ``bind_blend_state``, and +``destroy_blend_state``. CSO objects handled by the context object: * :ref:`Blend`: ``*_blend_state`` -* :ref:`Sampler`: These are special; they can be bound to either vertex or - fragment samplers, and they are bound in groups. - ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states`` +* :ref:`Sampler`: Texture sampler states are bound separately for fragment, + vertex and geometry samplers. Note that sampler states are set en masse. + If M is the max number of sampler units supported by the driver and N + samplers are bound with ``bind_fragment_sampler_states`` then sampler + units N..M-1 are considered disabled/NULL. * :ref:`Rasterizer`: ``*_rasterizer_state`` * :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state`` -* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for - fragment shaders, and ``*_vs_state`` is for vertex shaders. +* :ref:`Shader`: These are create, bind and destroy methods for vertex, + fragment and geometry shaders. * :ref:`Vertex Elements`: ``*_vertex_elements_state`` @@ -47,6 +54,9 @@ buffers, surfaces) are bound to the driver. * ``set_index_buffer`` +* ``set_stream_output_buffers`` + + Non-CSO State ^^^^^^^^^^^^^ @@ -96,7 +106,9 @@ to the array index which is used for sampling. * ``set_fragment_sampler_views`` binds an array of sampler views to fragment shader stage. Every binding point acquires a reference to a respective sampler view and releases a reference to the previous - sampler view. + sampler view. If M is the maximum number of sampler units and N units + is passed to set_fragment_sampler_views, the driver should unbind the + sampler views for units N..M-1. * ``set_vertex_sampler_views`` binds an array of sampler views to vertex shader stage. Every binding point acquires a reference to a respective @@ -233,6 +245,11 @@ The most common type of query is the occlusion query, are written to the framebuffer without being culled by :ref:`Depth, Stencil, & Alpha` testing or shader KILL instructions. The result is an unsigned 64-bit integer. +In cases where a boolean result of an occlusion query is enough, +``PIPE_QUERY_OCCLUSION_PREDICATE`` should be used. It is just like +``PIPE_QUERY_OCCLUSION_COUNTER`` except that the result is a boolean +value of FALSE for cases where COUNTER would result in 0 and TRUE +for all other cases. Another type of query, ``PIPE_QUERY_TIME_ELAPSED``, returns the amount of time, in nanoseconds, the context takes to perform operations. @@ -349,6 +366,22 @@ Any pointers into the map should be considered invalid and discarded. Basically get_transfer, transfer_map, data write, transfer_unmap, and transfer_destroy all in one. + +The box parameter to some of these functions defines a 1D, 2D or 3D +region of pixels. This is self-explanatory for 1D, 2D and 3D texture +targets. + +For PIPE_TEXTURE_1D_ARRAY, the box::y and box::height fields refer to the +array dimension of the texture. + +For PIPE_TEXTURE_2D_ARRAY, the box::z and box::depth fields refer to the +array dimension of the texture. + +For PIPE_TEXTURE_CUBE, the box:z and box::depth fields refer to the +faces of the cube map (z + depth <= 6). + + + .. _transfer_flush_region: transfer_flush_region @@ -359,6 +392,22 @@ be flushed on write or unmap. Flushes must be requested with ``transfer_flush_region``. Flush ranges are relative to the mapped range, not the beginning of the resource. + + +.. _redefine_user_buffer: + +redefine_user_buffer +%%%%%%%%%%%%%%%%%%%% + +This function notifies a driver that the user buffer content has been changed. +The updated region starts at ``offset`` and is ``size`` bytes large. +The ``offset`` is relative to the pointer specified in ``user_buffer_create``. +While uploading the user buffer, the driver is allowed not to upload +the memory outside of this region. +The width0 is redefined to ``MAX2(width0, offset+size)``. + + + .. _pipe_transfer: PIPE_TRANSFER @@ -366,16 +415,32 @@ PIPE_TRANSFER These flags control the behavior of a transfer object. -* ``READ``: resource contents are read at transfer create time. -* ``WRITE``: resource contents will be written back at transfer destroy time. -* ``MAP_DIRECTLY``: a transfer should directly map the resource. May return - NULL if not supported. -* ``DISCARD``: The memory within the mapped region is discarded. - Cannot be used with ``READ``. -* ``DONTBLOCK``: Fail if the resource cannot be mapped immediately. -* ``UNSYNCHRONIZED``: Do not synchronize pending operations on the resource - when mapping. The interaction of any writes to the map and any - operations pending on the resource are undefined. Cannot be used with - ``READ``. -* ``FLUSH_EXPLICIT``: Written ranges will be notified later with - :ref:`transfer_flush_region`. Cannot be used with ``READ``. +``PIPE_TRANSFER_READ`` + Resource contents read back (or accessed directly) at transfer create time. + +``PIPE_TRANSFER_WRITE`` + Resource contents will be written back at transfer_destroy time (or modified + as a result of being accessed directly). + +``PIPE_TRANSFER_MAP_DIRECTLY`` + a transfer should directly map the resource. May return NULL if not supported. + +``PIPE_TRANSFER_DISCARD_RANGE`` + The memory within the mapped region is discarded. Cannot be used with + ``PIPE_TRANSFER_READ``. + +``PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE`` + Discards all memory backing the resource. It should not be used with + ``PIPE_TRANSFER_READ``. + +``PIPE_TRANSFER_DONTBLOCK`` + Fail if the resource cannot be mapped immediately. + +``PIPE_TRANSFER_UNSYNCHRONIZED`` + Do not synchronize pending operations on the resource when mapping. The + interaction of any writes to the map and any operations pending on the + resource are undefined. Cannot be used with ``PIPE_TRANSFER_READ``. + +``PIPE_TRANSFER_FLUSH_EXPLICIT`` + Written ranges will be notified later with :ref:`transfer_flush_region`. + Cannot be used with ``PIPE_TRANSFER_READ``. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index ab90097add6..976e75bed04 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -273,7 +273,7 @@ Modern APIs allow using buffers as shader resources. **depth0** the depth of the base mip level of the texture (1 for everything else). -**array_size the array size for 1D and 2D array textures. +**array_size** the array size for 1D and 2D array textures. For cube maps this must be 6, for other textures 1. **last_level** the last mip map level present. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index d986e6601e6..4debcc6ecc4 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1250,6 +1250,157 @@ This opcode is the inverse of :opcode:`DFRACEXP`. dst.zw = \sqrt{src.zw} +.. _resourceopcodes: + +Resource Access Opcodes +^^^^^^^^^^^^^^^^^^^^^^^^ + +Those opcodes follow very closely semantics of the respective Direct3D +instructions. If in doubt double check Direct3D documentation. + +.. opcode:: LOAD - Simplified alternative to the "SAMPLE" instruction. + Using the provided integer address, LOAD fetches data + from the specified buffer/texture without any filtering. + The source data may come from any resource type other + than CUBE. + LOAD dst, address, resource + e.g. + LOAD TEMP[0], TEMP[1], RES[0] + The 'address' is specified as unsigned integers. If the + 'address' is out of range [0...(# texels - 1)] the + result of the fetch is always 0 in all components. + As such the instruction doesn't honor address wrap + modes, in cases where that behavior is desirable + 'sample' instruction should be used. + address.w always provides an unsigned integer mipmap + level. If the value is out of the range then the + instruction always returns 0 in all components. + address.yz are ignored for buffers and 1d textures. + address.z is ignored for 1d texture arrays and 2d + textures. + For 1D texture arrays address.y provides the array + index (also as unsigned integer). If the value is + out of the range of available array indices + [0... (array size - 1)] then the opcode always returns + 0 in all components. + For 2D texture arrays address.z provides the array + index, otherwise it exhibits the same behavior as in + the case for 1D texture arrays. + The exeact semantics of the source address are presented + in the table below: + resource type X Y Z W + ------------- ------------------------ + PIPE_BUFFER x ignored + PIPE_TEXTURE_1D x mpl + PIPE_TEXTURE_2D x y mpl + PIPE_TEXTURE_3D x y z mpl + PIPE_TEXTURE_RECT x y mpl + PIPE_TEXTURE_CUBE not allowed as source + PIPE_TEXTURE_1D_ARRAY x idx mpl + PIPE_TEXTURE_2D_ARRAY x y idx mpl + + Where 'mpl' is a mipmap level and 'idx' is the + array index. + + +.. opcode:: LOAD_MS - Just like LOAD but allows fetch data from + multi-sampled surfaces. + +.. opcode:: SAMPLE - Using provided address, sample data from the + specified texture using the filtering mode identified + by the gven sampler. The source data may come from + any resource type other than buffers. + SAMPLE dst, address, resource, sampler + e.g. + SAMPLE TEMP[0], TEMP[1], RES[0], SAMP[0] + +.. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the + exception that an additiona bias is applied to the + level of detail computed as part of the instruction + execution. + SAMPLE_B dst, address, resource, sampler, lod_bias + e.g. + SAMPLE_B TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2].x + +.. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it + performs a comparison filter. The operands to SAMPLE_C + are identical to SAMPLE, except that tere is an additional + float32 operand, reference value, which must be a register + with single-component, or a scalar literal. + SAMPLE_C makes the hardware use the current samplers + compare_func (in pipe_sampler_state) to compare + reference value against the red component value for the + surce resource at each texel that the currently configured + texture filter covers based on the provided coordinates. + SAMPLE_C dst, address, resource.r, sampler, ref_value + e.g. + SAMPLE_C TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x + +.. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives + are ignored. The LZ stands for level-zero. + SAMPLE_C_LZ dst, address, resource.r, sampler, ref_value + e.g. + SAMPLE_C_LZ TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x + + +.. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except + that the derivatives for the source address in the x + direction and the y direction are provided by extra + parameters. + SAMPLE_D dst, address, resource, sampler, der_x, der_y + e.g. + SAMPLE_D TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2], TEMP[3] + +.. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except + that the LOD is provided directly as a scalar value, + representing no anisotropy. Source addresses A channel + is used as the LOD. + SAMPLE_L dst, address, resource, sampler + e.g. + SAMPLE_L TEMP[0], TEMP[1], RES[0], SAMP[0] + + +.. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear + filtering operation and packs them into a single register. + Only woth with 2D, 2D array, cubemaps, and cubemaps arrays. + For 2D textures, only the addressing modes of the sampler and + the top level of any mip pyramid are used. Set W to zero. + It behaves like the SAMPLE instruction, but a filtered + sample is not generated. The four samples that contribute + to filtering are places into xyzw in cunter-clockwise order, + starting with the (u,v) texture coordinate delta at the + following locations (-, +), (+, +), (+, -), (-, -), where + the magnitude of the deltas are half a texel. + + +.. opcode:: RESINFO - query the dimensions of a given input buffer. + dst receives width, height, depth or array size and + number of mipmap levels. The dst can have a writemask + which will specify what info is the caller interested + in. + RESINFO dst, src_mip_level, resource + e.g. + RESINFO TEMP[0], TEMP[1].x, RES[0] + src_mip_level is an unsigned integer scalar. If it's + out of range then returns 0 for width, height and + depth/array size but the total number of mipmap is + still returned correctly for the given resource. + The returned width, height and depth values are for + the mipmap level selected by the src_mip_level and + are in the number of texels. + For 1d texture array width is in dst.x, array size + is in dst.y and dst.zw are always 0. + +.. opcode:: SAMPLE_POS - query the position of a given sample. + dst receives float4 (x, y, 0, 0) indicated where the + sample is located. If the resource is not a multi-sample + resource and not a render target, the result is 0. + +.. opcode:: SAMPLE_INFO - dst receives number of samples in x. + If the resource is not a multi-sample resource and + not a render target, the result is 0. + + Explanation of symbols used ------------------------------ @@ -1332,6 +1483,8 @@ wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X is set to 1, the X component should be interpolated according to cylindrical wrapping rules. +If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows. + Declaration Semantic ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1475,6 +1628,23 @@ is a writable stencil reference value. Only the Y component is writable. This allows the fragment shader to change the fragments stencilref value. +Declaration Resource +^^^^^^^^^^^^^^^^^^^^^^^^ + + Follows Declaration token if file is TGSI_FILE_RESOURCE. + + DCL RES[#], resource, type(s) + + Declares a shader input resource and assigns it to a RES[#] + register. + + resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and + 2DArray. + + type must be 1 or 4 entries (if specifying on a per-component + level) out of UNORM, SNORM, SINT, UINT and FLOAT. + + Properties ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index eb22a09a913..7f65b82619e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -33,6 +33,7 @@ #include "cell_state.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -115,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell) cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; + cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index af1fd953aaf..b4da1b8b901 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -30,6 +30,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "fo_context.h" @@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_constant_buffer = failover_set_constant_buffer; failover->pipe.create_sampler_view = failover_create_sampler_view; failover->pipe.sampler_view_destroy = failover_sampler_view_destroy; + failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 8cbf0b1de4a..75e4c253dd9 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -962,6 +962,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context, } +static void galahad_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1036,6 +1049,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer; glhd_pipe->pipe = pipe; diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index 94c428bebf8..f4e1423fa59 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -1,5 +1,13 @@ Random list of problems with i915g: +- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there. + If not fix it! A simple task, good for beginners. + +- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly + via the hardware, look at the classic driver, more advanced. + +- What does this button do? Figure out LIS7 with regards to depth offset. + - Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index ea3c10b5e78..707b2e9f956 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,6 +39,9 @@ #include "pipe/p_screen.h" +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE); + + /* * Draw functions */ @@ -50,7 +53,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) struct i915_context *i915 = i915_context(pipe); struct draw_context *draw = i915->draw; void *mapped_indices = NULL; - unsigned i; unsigned cbuf_dirty; @@ -64,14 +66,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) i915_update_derived(i915); /* - * Map vertex buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - - /* * Map index buffer, if present */ if (info->indexed && i915->index_buffer.buffer) @@ -90,13 +84,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) */ draw_vbo(i915->draw, info); - /* - * unmap vertex/index buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) draw_set_mapped_index_buffer(draw, NULL); } @@ -117,10 +104,6 @@ static void i915_destroy(struct pipe_context *pipe) if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); - for (i = 0; i < i915->num_vertex_buffers; i++) { - pipe_resource_reference(&i915->vertex_buffer[i].buffer, NULL); - } - /* unbind framebuffer */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); @@ -164,7 +147,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) */ i915->draw = draw_create(&i915->base); assert(i915->draw); - if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + if (!debug_get_option_i915_no_vbuf()) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); @@ -180,6 +163,8 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->dirty = ~0; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index b7f1fb22221..2cf53424f06 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -219,14 +219,12 @@ struct i915_context { struct pipe_scissor_state scissor; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; unsigned dirty; unsigned num_samplers; unsigned num_fragment_sampler_views; - unsigned num_vertex_buffers; struct i915_winsys_batchbuffer *batch; @@ -237,6 +235,8 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; + unsigned immediate_dirty; + unsigned dynamic_dirty; struct util_slab_mempool transfer_pool; }; diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 845e92cf5c6..1713bf131f2 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -48,10 +48,15 @@ static const struct debug_named_value debug_options[] = { unsigned i915_debug = 0; boolean i915_tiling = TRUE; -void i915_debug_init(struct i915_screen *screen) +DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0) +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE) + +void i915_debug_init(struct i915_screen *is) { - i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); - i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); + i915_debug = debug_get_option_i915_debug(); + is->debug.tiling = !debug_get_option_i915_no_tiling(); + is->debug.lie = debug_get_option_i915_lie(); } diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 11af7662f0a..fa60799d0c5 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; -extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index a2c70b11991..911c051d1f2 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -74,7 +74,6 @@ static void i915_flush_pipe( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); - i915->vbo_flushed = 1; I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); } @@ -93,5 +92,8 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) struct i915_winsys_batchbuffer *batch = i915->batch; batch->iws->batchbuffer_flush(batch, fence); + i915->vbo_flushed = 1; i915->hardware_dirty = ~0; + i915->immediate_dirty = ~0; + i915->dynamic_dirty = ~0; } diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index e793d126ade..aad5235a6ad 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -172,15 +172,15 @@ i915_texture_set_image_offset(struct i915_texture *tex, } static enum i915_winsys_buffer_tile -i915_texture_tiling(struct pipe_resource *pt) +i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) { - if (!i915_tiling) + if (!is->debug.tiling) return I915_TILE_NONE; - if (pt->target == PIPE_TEXTURE_1D) + if (tex->b.b.target == PIPE_TEXTURE_1D) return I915_TILE_NONE; - if (util_format_is_s3tc(pt->format)) + if (util_format_is_s3tc(tex->b.b.format)) /* XXX X-tiling might make sense */ return I915_TILE_NONE; @@ -401,11 +401,7 @@ i915_texture_layout_3d(struct i915_texture *tex) static boolean i915_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -649,11 +645,7 @@ i945_texture_layout_cube(struct i915_texture *tex) static boolean i945_texture_layout(struct i915_texture * tex) { - struct pipe_resource *pt = &tex->b.b; - - tex->tiling = i915_texture_tiling(pt); - - switch (pt->target) { + switch (tex->b.b.target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: @@ -664,7 +656,7 @@ i945_texture_layout(struct i915_texture * tex) i945_texture_layout_3d(tex); break; case PIPE_TEXTURE_CUBE: - if (!util_format_is_s3tc(pt->format)) + if (!util_format_is_s3tc(tex->b.b.format)) i9x5_texture_layout_cube(tex); else i945_texture_layout_cube(tex); @@ -818,6 +810,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->b.b.reference, 1); tex->b.b.screen = screen; + tex->tiling = i915_texture_tiling(is, tex); + if (is->is_i945) { if (!i945_texture_layout(tex)) goto fail; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index bdbc08e8086..77febbf5012 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -98,59 +98,81 @@ i915_get_name(struct pipe_screen *screen) } static int -i915_get_param(struct pipe_screen *screen, enum pipe_cap param) +i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + struct i915_screen *is = i915_screen(screen); + + switch (cap) { + /* Supported features (boolean caps). */ + case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_NPOT_TEXTURES: - return 1; + case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TWO_SIDED_STENCIL: return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: + + /* Features that should be supported (boolean caps). */ + /* XXX: Just test the code */ + case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 0; + + /* Unsupported features (boolean caps). */ + case PIPE_CAP_ARRAY_TEXTURES: + case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */ + case PIPE_CAP_GLSL: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_INSTANCED_DRAWING: /* draw module? */ case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_TIMER_QUERY: return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + + /* Features we can lie about (boolean caps). */ case PIPE_CAP_OCCLUSION_QUERY: + return is->debug.lie ? 1 : 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 8; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: return 0; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return I915_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return I915_MAX_TEXTURE_2D_LEVELS; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Fragment coordinate conventions. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: - /* disable for now */ - return 0; + default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static int -i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap) { switch(shader) { case PIPE_SHADER_VERTEX: - return draw_get_shader_param(shader, param); + return draw_get_shader_param(shader, cap); case PIPE_SHADER_FRAGMENT: break; default: @@ -158,7 +180,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha } /* XXX: these are just shader model 2.0 values, fix this! */ - switch(param) { + switch(cap) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: return 96; case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: @@ -191,15 +213,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_SUBROUTINES: return 0; default: - assert(0); + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } static float -i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) +i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap) { - switch (param) { + switch(cap) { case PIPE_CAP_MAX_LINE_WIDTH: /* fall-through */ case PIPE_CAP_MAX_LINE_WIDTH_AA: @@ -217,6 +239,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param) return 16.0; default: + debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap); return 0; } } @@ -321,6 +344,20 @@ i915_fence_finish(struct pipe_screen *screen, static void +i915_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *winsys_drawable_handle) +{ + /* XXX: Dummy right now. */ + (void)screen; + (void)resource; + (void)level; + (void)layer; + (void)winsys_drawable_handle; +} + +static void i915_destroy_screen(struct pipe_screen *screen) { struct i915_screen *is = i915_screen(screen); @@ -371,6 +408,7 @@ i915_screen_create(struct i915_winsys *iws) is->base.winsys = NULL; is->base.destroy = i915_destroy_screen; + is->base.flush_frontbuffer = i915_flush_frontbuffer; is->base.get_name = i915_get_name; is->base.get_vendor = i915_get_vendor; diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index bb4d255a3b3..60f0e2971e0 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -45,6 +45,11 @@ struct i915_screen struct i915_winsys *iws; boolean is_i945; + + struct { + boolean tiling; + boolean lie; + } debug; }; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 4a1a4a04f6d..58bbbd1de2c 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -33,6 +33,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" @@ -57,10 +58,8 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; - /* - case PIPE_TEX_WRAP_MIRRORED_REPEAT: + case PIPE_TEX_WRAP_MIRROR_REPEAT: return TEXCOORDMODE_MIRROR; - */ default: return TEXCOORDMODE_WRAP; } @@ -535,21 +534,31 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, /* if we have a new buffer compare it with the old one */ if (buf) { - struct i915_buffer *ir = i915_buffer(buf); + struct i915_buffer *ibuf = i915_buffer(buf); struct pipe_resource *old_buf = i915->constants[shader]; struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL; - - new_num = ir->b.b.width0 / 4 * sizeof(float); - - if (old && new_num != i915->current.num_user_constants[shader]) - diff = memcmp(old->data, ir->data, ir->b.b.width0); + unsigned old_num = i915->current.num_user_constants[shader]; + + new_num = ibuf->b.b.width0 / 4 * sizeof(float); + + if (old_num == new_num) { + if (old_num == 0) + diff = FALSE; +#if 0 + /* XXX no point in running this code since st/mesa only uses user buffers */ + /* Can't compare the buffer data since they are userbuffers */ + else if (old && old->free_on_destroy) + diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0); +#else + (void)old; +#endif + } } else { diff = i915->current.num_user_constants[shader] != 0; } /* * flush before updateing the state. - * XXX: looks like its okay to skip the flush for vertex cbufs */ if (diff && shader == PIPE_SHADER_FRAGMENT) draw_flush(i915->draw); @@ -766,17 +775,25 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); + struct draw_context *draw = i915->draw; + int i; - util_copy_vertex_buffers(i915->vertex_buffer, - &i915->num_vertex_buffers, - buffers, count); +#if 0 + /* XXX doesn't look like this is needed */ + /* unmap old */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + } +#endif /* pass-through to draw module */ - draw_set_vertex_buffers(i915->draw, count, buffers); + draw_set_vertex_buffers(draw, count, buffers); + + /* map new */ + for (i = 0; i < count; i++) { + void *buf = i915_buffer(buffers[i].buffer)->data; + draw_set_mapped_vertex_buffer(draw, i, buf); + } } static void * @@ -801,11 +818,6 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); - /* pass-through to draw module */ if (i915_velems) { draw_set_vertex_elements(i915->draw, @@ -882,4 +894,5 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_index_buffer = i915_set_index_buffer; + i915->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index d61a8c3407f..204cee6fe9e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -46,18 +46,34 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect(struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords) +static INLINE void set_dynamic(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.dynamic[offset] == state) + return; + + i915->current.dynamic[offset] = state; + i915->dynamic_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + + +static INLINE void set_dynamic_array(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4)) return; - for (i = 0; i < dwords; i++) + for (i = 0; i < dwords; i++) { i915->current.dynamic[offset + i] = src[i]; + i915->dynamic_dirty |= 1 << (offset + i); + } i915->hardware_dirty |= I915_HW_DYNAMIC; } @@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915) */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: - */ - set_dynamic_indirect(i915, - I915_DYNAMIC_MODES4, - &modes4, - 1); + set_dynamic(i915, I915_DYNAMIC_MODES4, modes4); } const struct i915_tracked_state i915_upload_MODES4 = { @@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915) bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect(i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2); + set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2); } const struct i915_tracked_state i915_upload_BFO = { @@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915) color[3]); } - set_dynamic_indirect(i915, - I915_DYNAMIC_BC_0, - bc, - 2); + set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { @@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - set_dynamic_indirect(i915, - I915_DYNAMIC_IAB, - &iab, - 1); + set_dynamic(i915, I915_DYNAMIC_IAB, iab); } const struct i915_tracked_state i915_upload_IAB = { @@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = { */ static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect(i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2); + set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0, + &i915->rasterizer->ds[0].u, 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { @@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915) (p[3] << 12)); } - set_dynamic_indirect(i915, - I915_DYNAMIC_STP_0, - &st[0], - 2); + set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2); } const struct i915_tracked_state i915_upload_STIPPLE = { @@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = { */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1); + set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { @@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915) sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect(i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3); + set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3); } const struct i915_tracked_state i915_upload_SCISSOR_RECT = { diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 5a89977c26c..509d487b498 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -35,6 +35,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "util/u_math.h" + static unsigned translate_format( enum pipe_format format ) { switch (format) { @@ -178,11 +180,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); /* disable indirect state for now @@ -194,27 +191,30 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 7 dwords, 1 relocs */ if (i915->hardware_dirty & I915_HW_IMMEDIATE) { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } } #if 01 @@ -223,7 +223,8 @@ i915_emit_hardware_state(struct i915_context *i915 ) { int i; for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); + if (i915->dynamic_dirty & (1 << i)); + OUT_BATCH(i915->current.dynamic[i]); } } #endif @@ -443,4 +444,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; + i915->dynamic_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index f9ade7077f2..81348647399 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -36,12 +36,20 @@ #include "util/u_memory.h" -/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. - * Would like to opportunistically recombine all these fragments into - * a single packet containing only what has changed, but for now emit - * as multiple packets. +/* Convinience function to check immediate state. */ +static INLINE void set_immediate(struct i915_context *i915, + unsigned offset, + const unsigned state) +{ + if (i915->current.immediate[offset] == state) + return; + + i915->current.immediate[offset] = state; + i915->immediate_dirty |= 1 << offset; + i915->hardware_dirty |= I915_HW_IMMEDIATE; +} @@ -56,9 +64,14 @@ static void upload_S0S1(struct i915_context *i915) */ LIS0 = i915->vbo_offset; + /* Need to force this */ + if (i915->dirty & I915_NEW_VBO) { + i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } + /* I915_NEW_VERTEX_SIZE */ - /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; @@ -66,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915) (vertex_size << 16)); } - /* I915_NEW_VBO - */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S0, LIS0); + set_immediate(i915, I915_IMMEDIATE_S1, LIS1); } const struct i915_tracked_state i915_upload_S0S1 = { @@ -98,21 +103,13 @@ static void upload_S2S4(struct i915_context *i915) { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; - /* - debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); - */ assert(LIS4); /* should never be zero? */ } LIS4 |= i915->rasterizer->LIS4; - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S2, LIS2); + set_immediate(i915, I915_IMMEDIATE_S4, LIS4); } const struct i915_tracked_state i915_upload_S2S4 = { @@ -142,15 +139,12 @@ static void upload_S5(struct i915_context *i915) #if 0 /* I915_NEW_RASTERIZER */ - if (i915->state.Polygon->OffsetFill) { + if (i915->rasterizer->LIS7) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S5, LIS5); } const struct i915_tracked_state i915_upload_S5 = { @@ -180,14 +174,11 @@ static void upload_S6(struct i915_context *i915) */ LIS6 |= i915->depth_stencil->depth_LIS6; - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } + set_immediate(i915, I915_IMMEDIATE_S6, LIS6); } const struct i915_tracked_state i915_upload_S6 = { - "imm s6", + "imm S6", upload_S6, I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; @@ -204,10 +195,9 @@ static void upload_S7(struct i915_context *i915) */ LIS7 = i915->rasterizer->LIS7; - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } +#if 0 + set_immediate(i915, I915_IMMEDIATE_S7, LIS7); +#endif } const struct i915_tracked_state i915_upload_S7 = { diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 24ea416f015..e915a886c9b 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -103,11 +103,12 @@ struct i915_winsys { * @usage how is the hardware going to use the buffer. * @offset add this to the reloc buffers address * @target buffer where to write the address, null for batchbuffer. + * @fenced relocation needs a fence. */ int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset, bool fenced); + unsigned offset, boolean fenced); /** * Flush a bufferbatch. diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index b23454b5808..570ea23ff45 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -4,6 +4,7 @@ #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_transfer.h" static unsigned brw_translate_surface_format( unsigned id ) @@ -302,6 +303,7 @@ brw_pipe_vertex_init( struct brw_context *brw ) brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; + brw->base.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 3efbd6a246d..b533abe24c6 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -855,6 +855,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context, } +static void identity_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *context = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -929,6 +942,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region; id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write; + id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 26b258b9569..c10a8cbc12c 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -96,10 +96,15 @@ if env['platform'] != 'embedded': tests.append('round') for test in tests: + testname = 'lp_test_' + test target = env.Program( - target = 'lp_test_' + test, - source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + target = testname, + source = [testname + '.c', 'lp_test_main.c'], ) env.InstallProgram(target) + + # http://www.scons.org/wiki/UnitTests + alias = env.Alias(testname, [target], target[0].abspath) + AlwaysBuild(alias) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 85e3cdec82c..849db06acdf 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -72,7 +72,7 @@ llvmpipe_flush( struct pipe_context *pipe, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); } if (0) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index fffdeb6ccde..be86f66de91 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -34,6 +34,7 @@ #include "draw/draw_context.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" static void * @@ -114,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; + + llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 149ee6f1256..d229c620310 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -35,24 +35,13 @@ #include "util/u_cpu_detect.h" +#include "util/u_math.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_init.h" #include "lp_test.h" -#ifdef PIPE_CC_MSVC -static INLINE double -round(double x) -{ - if (x >= 0.0) - return floor(x + 0.5); - else - return ceil(x - 0.5); -} -#endif - - void dump_type(FILE *fp, struct lp_type type) diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c index ad324774c03..00a4c1eb01e 100644 --- a/src/gallium/drivers/noop/noop_state.c +++ b/src/gallium/drivers/noop/noop_state.c @@ -28,6 +28,7 @@ #include <pipe/p_screen.h> #include <util/u_memory.h> #include <util/u_inlines.h> +#include "util/u_transfer.h" static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { @@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx) ctx->sampler_view_destroy = noop_sampler_view_destroy; ctx->surface_destroy = noop_surface_destroy; ctx->draw_vbo = noop_draw_vbo; + ctx->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 27eb3817bf1..679e5ea1485 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -299,7 +299,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 53f94820ce0..f3418df8381 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -45,7 +45,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type) struct nv50_query *q = CALLOC_STRUCT(nv50_query); int ret; - assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); + assert (type == PIPE_QUERY_OCCLUSION_COUNTER); q->type = type; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256, diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index b4eda0f617d..ba2c3e8c281 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -23,6 +23,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -886,5 +887,6 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; nv50->pipe.set_index_buffer = nv50_set_index_buffer; + nv50->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index d6b80c3ea79..ce9300ad8fd 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -1130,7 +1130,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: res = bld_saved_input(bld, idx, swz); if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - return res; + break; res = new_value(bld->pc, bld->ti->input_file, type); res->reg.id = bld->ti->input_map[idx][swz]; diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 61932ff2b6a..73a605f94e1 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210 +#define NVC0_3D_MEM_BARRIER 0x0000021c +#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 + #define NVC0_3D_TESS_MODE 0x00000320 #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 @@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) -#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 +#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 #define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) +#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 +#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004 #define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0)) +#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010 +#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004 #define NVC0_3D_TFB_ENABLE 0x00000744 @@ -144,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_RT__ESIZE 0x00000020 #define NVC0_3D_RT__LEN 0x00000008 -#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0)) -#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0)) +#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0)) -#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0)) +#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0)) -#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0)) +#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0)) -#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0)) +#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0)) -#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0)) +#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0)) #define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001 #define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070 #define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4 #define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700 #define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8 -#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0)) +#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0)) #define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff #define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0 #define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000 -#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0)) +#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0)) #define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020 @@ -216,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010 #define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16 - -#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0)) -#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff -#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000 -#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16 +#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16 + +#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0)) +#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008 +#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff +#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0 +#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000 +#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16 #define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0)) #define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008 @@ -356,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff #define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0 +#define NVC0_3D_CLEAR_FLAGS 0x000010f8 +#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001 +#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010 +#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100 +#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000 + #define NVC0_3D_VERTEX_ID 0x00001118 #define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c @@ -561,7 +581,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004 #define NVC0_3D_BLEND_ENABLE__LEN 0x00000008 -#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380 +#define NVC0_3D_STENCIL_ENABLE 0x00001380 #define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384 #define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 @@ -605,9 +625,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394 -#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398 +#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398 -#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c +#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c #define NVC0_3D_DRAW_TFB_BASE 0x000013a4 @@ -642,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_HEIGHT 0x00001504 #define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16 + +#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff +#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000 +#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16 + #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001 #define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002 @@ -814,8 +846,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERTEX_BASE_LOW 0x000015f8 #define NVC0_3D_POINT_COORD_REPLACE 0x00001604 -#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff -#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000 +#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8 +#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3 #define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608 @@ -864,8 +900,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000 #define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001 -#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660 - #define NVC0_3D_TEX_MISC 0x00001664 #define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004 @@ -928,22 +962,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000 -#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000 +#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000 -#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c +#define NVC0_3D_CLIP_RECTS_EN 0x0000194c -#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950 -#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000 -#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001 -#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002 +#define NVC0_3D_CLIP_RECTS_MODE 0x00001950 +#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000 +#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001 +#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002 #define NVC0_3D_FP_ZORDER_CTRL 0x0000196c #define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001 @@ -996,6 +1036,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00 #define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10 +#define NVC0_3D_CLIPID_FILL 0x000019d4 + #define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0)) #define NVC0_3D_COLOR_MASK__ESIZE 0x00000004 #define NVC0_3D_COLOR_MASK__LEN 0x00000008 @@ -1155,9 +1197,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 -#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0)) +#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 -#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080 +#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 #define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808 diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c index ea3e642a448..aa949bdfa36 100644 --- a/src/gallium/drivers/nvc0/nvc0_buffer.c +++ b/src/gallium/drivers/nvc0/nvc0_buffer.c @@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence) (*mm) = NULL; } -static INLINE boolean -nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, - unsigned domain) +INLINE void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf) { nouveau_bo_ref(NULL, &buf->bo); if (buf->mm) release_allocation(&buf->mm, buf->fence); + buf->domain = 0; +} + +static INLINE boolean +nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf, + unsigned domain) +{ + nvc0_buffer_release_gpu_storage(buf); + return nvc0_buffer_allocate(screen, buf, domain); } @@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen, { struct nvc0_resource *res = nvc0_resource(presource); - nouveau_bo_ref(NULL, &res->bo); - - if (res->mm) - release_allocation(&res->mm, res->fence); + nvc0_buffer_release_gpu_storage(res); if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY)) FREE(res->data); @@ -112,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf, memcpy(buf->data + start, bounce->map, size); nouveau_bo_unmap(bounce); - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; nouveau_bo_ref(NULL, &bounce); if (mm) @@ -151,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf, release_allocation(&mm, nvc0->screen->fence.current); if (start == 0 && size == buf->base.width0) - buf->status &= ~NVC0_BUFFER_STATUS_DIRTY; + buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; return TRUE; } @@ -174,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe, if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & PIPE_TRANSFER_READ) { - if (buf->status & NVC0_BUFFER_STATUS_DIRTY) + if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING) nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0); } } diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 2118abb5d5d..f02de4d044a 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -41,17 +41,18 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags, OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); OUT_RING (chan, 0x00); + } else + if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); } - if (fence) { - nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE); - } + if (fence) + nvc0_fence_reference((struct nvc0_fence **)fence, + nvc0->screen->fence.current); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) { + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) FIRE_RING(chan); - - nvc0_screen_fence_next(nvc0->screen); - } } static void @@ -67,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe) FREE(nvc0); } +void +nvc0_default_flush_notify(struct nouveau_channel *chan) +{ + struct nvc0_context *nvc0 = chan->user_private; + + nvc0_screen_fence_update(nvc0->screen, TRUE); + + nvc0_screen_fence_next(nvc0->screen); +} + struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv) { @@ -91,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nvc0->pipe.flush = nvc0_flush; screen->base.channel->user_private = nvc0; + screen->base.channel->flush_notify = nvc0_default_flush_notify; nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); @@ -148,12 +160,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0) { struct resident *rsd; struct util_dynarray *array; - unsigned ctx, i; + unsigned ctx, i, n; for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) { array = &nvc0->residents[ctx]; - for (i = 0; i < array->size / sizeof(struct resident); ++i) { + n = array->size / sizeof(struct resident); + MARK_RING(nvc0->screen->base.channel, n, n); + for (i = 0; i < n; ++i) { rsd = util_dynarray_element(array, struct resident, i); nvc0_resource_validate(rsd->res, rsd->flags); diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 94117988e50..1ce5554f7b7 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -54,6 +54,8 @@ #define NVC0_NEW_CONSTBUF (1 << 18) #define NVC0_NEW_TEXTURES (1 << 19) #define NVC0_NEW_SAMPLERS (1 << 20) +#define NVC0_NEW_TFB (1 << 21) +#define NVC0_NEW_TFB_BUFFERS (1 << 22) #define NVC0_BUFCTX_CONSTANT 0 #define NVC0_BUFCTX_FRAME 1 @@ -79,6 +81,7 @@ struct nvc0_context { uint8_t num_vtxelts; uint8_t num_textures[5]; uint8_t num_samplers[5]; + uint8_t tls_required; /* bitmask of shader types using l[] */ uint16_t scissor; uint32_t uniform_buffer_bound[5]; } state; @@ -123,6 +126,11 @@ struct nvc0_context { boolean vbo_dirty; boolean vbo_push_hint; + struct nvc0_transform_feedback_state *tfb; + struct pipe_resource *tfbbuf[4]; + unsigned num_tfbbufs; + unsigned tfb_offset[4]; + struct draw_context *draw; }; @@ -149,6 +157,8 @@ nvc0_surface(struct pipe_surface *ps) /* nvc0_context.c */ struct pipe_context *nvc0_create(struct pipe_screen *, void *); +void nvc0_default_flush_notify(struct nouveau_channel *); + void nvc0_bufctx_emit_relocs(struct nvc0_context *); void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx, struct nvc0_resource *, uint32_t flags); @@ -177,6 +187,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *); void nvc0_gmtyprog_validate(struct nvc0_context *); void nvc0_fragprog_validate(struct nvc0_context *); +void nvc0_tfb_validate(struct nvc0_context *); + /* nvc0_state.c */ extern void nvc0_init_state_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c index 9d2c48cf14d..f2d4b1451bf 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.c +++ b/src/gallium/drivers/nvc0/nvc0_fence.c @@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence) assert(fence->state == NVC0_FENCE_STATE_AVAILABLE); + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR); @@ -83,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence) struct nvc0_fence *it; struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) { + if (fence->state == NVC0_FENCE_STATE_EMITTED || + fence->state == NVC0_FENCE_STATE_FLUSHED) { if (fence == screen->fence.head) { screen->fence.head = fence->next; if (!screen->fence.head) @@ -118,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence) fence->buffers = NULL; } -static void -nvc0_screen_fence_update(struct nvc0_screen *screen) +void +nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed) { struct nvc0_fence *fence; struct nvc0_fence *next = NULL; @@ -146,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen) screen->fence.head = next; if (!next) screen->fence.tail = NULL; -} -#define NVC0_FENCE_MAX_SPINS (1 << 17) + if (flushed) { + for (fence = next; fence; fence = fence->next) + fence->state = NVC0_FENCE_STATE_FLUSHED; + } +} boolean nvc0_fence_signalled(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - if (fence->state == NVC0_FENCE_STATE_EMITTED) - nvc0_screen_fence_update(screen); + if (fence->state >= NVC0_FENCE_STATE_EMITTED) + nvc0_screen_fence_update(screen, FALSE); return fence->state == NVC0_FENCE_STATE_SIGNALLED; } +#define NVC0_FENCE_MAX_SPINS (1 << 31) + boolean nvc0_fence_wait(struct nvc0_fence *fence) { struct nvc0_screen *screen = fence->screen; - int spins = 0; + uint32_t spins = 0; - if (fence->state == NVC0_FENCE_STATE_AVAILABLE) { + if (fence->state < NVC0_FENCE_STATE_EMITTED) { nvc0_fence_emit(fence); - FIRE_RING(screen->base.channel); - if (fence == screen->fence.current) nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); } + if (fence->state < NVC0_FENCE_STATE_FLUSHED) + FIRE_RING(screen->base.channel); do { - nvc0_screen_fence_update(screen); + nvc0_screen_fence_update(screen, FALSE); if (fence->state == NVC0_FENCE_STATE_SIGNALLED) return TRUE; @@ -188,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence) #endif } while (spins < NVC0_FENCE_MAX_SPINS); - if (spins > 9000) - NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence); + debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n", + fence->sequence, + screen->fence.sequence_ack, screen->fence.sequence); return FALSE; } @@ -199,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen) { nvc0_fence_emit(screen->fence.current); nvc0_screen_fence_new(screen, &screen->fence.current, FALSE); - nvc0_screen_fence_update(screen); } diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h index e63c164bda4..3d8c3f8ba60 100644 --- a/src/gallium/drivers/nvc0/nvc0_fence.h +++ b/src/gallium/drivers/nvc0/nvc0_fence.h @@ -7,7 +7,8 @@ #define NVC0_FENCE_STATE_AVAILABLE 0 #define NVC0_FENCE_STATE_EMITTED 1 -#define NVC0_FENCE_STATE_SIGNALLED 2 +#define NVC0_FENCE_STATE_FLUSHED 2 +#define NVC0_FENCE_STATE_SIGNALLED 3 struct nvc0_mm_allocation; diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index 7c7e134146e..ea3ed9e0225 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, switch (pt->format) { case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x0700; /* COMPRESSED */ - tile_flags = 0x0200; /* NORMAL ? */ - tile_flags = 0x0100; /* NORMAL ? */ + tile_flags = 0x0100; /* NORMAL */ break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: tile_flags = 0x5300; /* MSAA 4, COMPRESSED */ @@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen, break; case PIPE_FORMAT_R16G16B16A16_UNORM: tile_flags = 0xe900; /* COMPRESSED */ + tile_flags = 0xfe00; /* NORMAL */ break; default: tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */ @@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe, pipe_reference_init(&ps->reference, 1); pipe_resource_reference(&ps->texture, pt); ps->context = pipe; - ps->format = pt->format; + ps->format = templ->format; ps->usage = templ->usage; ps->u.tex.level = templ->u.tex.level; ps->u.tex.first_layer = templ->u.tex.first_layer; diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c index 0629dad19c9..516d2e31b55 100644 --- a/src/gallium/drivers/nvc0/nvc0_mm.c +++ b/src/gallium/drivers/nvc0/nvc0_mm.c @@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size) static INLINE uint32_t mm_default_slab_size(unsigned chunk_order) { - assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); - static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] = { 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22 }; + assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER); + return 1 << slab_order[chunk_order - MM_MIN_ORDER]; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index 304a1919768..f51d289e8cd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, if (ld->indirect >= 0) return FALSE; + /* a few ops can use g[] sources directly, but we don't support g[] yet */ + if (ld->src[0]->value->reg.file == NV_FILE_MEM_L || + ld->src[0]->value->reg.file == NV_FILE_MEM_G) + return FALSE; + for (i = 0; i < 3 && nvi->src[i]; ++i) if (nvi->src[i]->value->reg.file == NV_FILE_IMM) return FALSE; @@ -55,15 +60,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s, boolean nvc0_insn_is_predicateable(struct nv_instruction *nvi) { - int s; - - if (!nv_op_predicateable(nvi->opcode)) + if (nvi->predicate >= 0) /* already predicated */ return FALSE; - if (nvi->predicate >= 0) + if (!nvc0_op_info_table[nvi->opcode].predicate && + !nvc0_op_info_table[nvi->opcode].pseudo) return FALSE; - for (s = 0; s < 4 && nvi->src[s]; ++s) - if (nvi->src[s]->value->reg.file == NV_FILE_IMM) - return FALSE; return TRUE; } @@ -103,6 +104,12 @@ nvc0_pc_replace_value(struct nv_pc *pc, return n; } +static INLINE boolean +is_gpr63(struct nv_value *val) +{ + return (val->reg.file == NV_FILE_GPR && val->reg.id == 63); +} + struct nv_value * nvc0_pc_find_constant(struct nv_ref *ref) { @@ -116,7 +123,7 @@ nvc0_pc_find_constant(struct nv_ref *ref) assert(!src->insn->src[0]->mod); src = src->insn->src[0]->value; } - if ((src->reg.file == NV_FILE_IMM) || + if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) || (src->insn && src->insn->opcode == NV_OP_LD && src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) && @@ -130,7 +137,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref) { struct nv_value *src = nvc0_pc_find_constant(ref); - return (src && src->reg.file == NV_FILE_IMM) ? src : NULL; + return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL; } static void @@ -187,7 +194,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, bb[p++] = b->out[j]; break; case CFG_EDGE_LOOP_LEAVE: - bbb[pp++] = b->out[j]; + if (!b->out[j]->priv) { + bbb[pp++] = b->out[j]; + b->out[j]->priv = 1; + } break; default: assert(0); @@ -499,6 +509,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i) i->bb = b; b->num_instructions++; + + if (i->prev && i->prev->terminator) + nvc0_insns_permute(i->prev, i); } void @@ -512,6 +525,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni) ni->prev = at; ni->next->prev = ni; ni->prev->next = ni; + ni->bb = at->bb; + ni->bb->num_instructions++; } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 969cc68c596..efa073a9201 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ @@ -204,6 +205,10 @@ #define NV_CC_C 0x11 #define NV_CC_A 0x12 #define NV_CC_S 0x13 +#define NV_CC_INVERSE(cc) ((cc) ^ 0x7) +/* for 1 bit predicates: */ +#define NV_CC_P 0 +#define NV_CC_NOT_P 1 #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -259,12 +264,6 @@ nv_op_supported_src_mods(uint opcode) return nvc0_op_info_table[opcode].mods; } -static INLINE boolean -nv_op_predicateable(uint opcode) -{ - return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE; -} - static INLINE uint nv_type_order(ubyte type) { @@ -310,7 +309,7 @@ struct nv_reg { int32_t s32; int64_t s64; uint64_t u64; - uint32_t u32; + uint32_t u32; /* expected to be 0 for $r63 */ float f32; double f64; } imm; @@ -344,6 +343,8 @@ struct nv_ref { uint8_t flags; }; +#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0) + struct nv_basic_block; struct nv_instruction { @@ -485,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode) assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS); insn->opcode = opcode; - insn->cc = 0; + insn->cc = NV_CC_P; insn->indirect = -1; insn->predicate = -1; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index db8055d91cd..c10f920e6f1 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) */ pc->emit[0] |= (pcrel & 0x3f) << 26; - pc->emit[1] |= (pcrel >> 6) & 0x1ffff; + pc->emit[1] |= (pcrel >> 6) & 0x3ffff; } } @@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i) { int src1 = i->tex_array + i->tex_dim + i->tex_cube; + assert(src1 < 6); + pc->emit[0] = 0x00000086; pc->emit[1] = 0x80000000; @@ -446,7 +448,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op) pc->emit[0] |= op << 26; - if (op >= 4) { + if (op >= 3) { if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9; if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7; } else { @@ -477,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0x99; i->lanes = 4; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -485,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i) { i->quadop = 0xa5; i->lanes = 5; + i->src[1] = i->src[0]; emit_quadop(pc, i); } @@ -629,25 +633,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i) static void emit_cvt(struct nv_pc *pc, struct nv_instruction *i) { + uint32_t rint; + pc->emit[0] = 0x00000004; pc->emit[1] = 0x10000000; - if (i->opcode != NV_OP_CVT) + /* if no type conversion specified, get type from opcode */ + if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s) i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode); switch (i->ext.cvt.d) { case NV_TYPE_F32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x10000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x18000000; break; } break; - case NV_TYPE_S32: pc->emit[0] |= 0x80; + case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */ case NV_TYPE_U32: switch (i->ext.cvt.s) { case NV_TYPE_F32: pc->emit[1] = 0x14000000; break; - case NV_TYPE_S32: pc->emit[0] |= 0x200; + case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */ case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break; } break; @@ -656,14 +663,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i) break; } - if (i->opcode == NV_OP_FLOOR) - pc->emit[1] |= 0x00020000; - else - if (i->opcode == NV_OP_CEIL) - pc->emit[1] |= 0x00040000; - else - if (i->opcode == NV_OP_TRUNC) - pc->emit[1] |= 0x00060000; + rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0; + + if (i->opcode == NV_OP_FLOOR) { + pc->emit[0] |= rint; + pc->emit[1] |= 2 << 16; + } else + if (i->opcode == NV_OP_CEIL) { + pc->emit[0] |= rint; + pc->emit[1] |= 4 << 16; + } else + if (i->opcode == NV_OP_TRUNC) { + pc->emit[0] |= rint; + pc->emit[1] |= 6 << 16; + } if (i->saturate || i->opcode == NV_OP_SAT) pc->emit[0] |= 0x20; @@ -793,11 +806,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i) } static void -emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +emit_ld_common(struct nv_pc *pc, struct nv_instruction *i) { - pc->emit[0] = 0x00000006; - pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); - emit_ldst_size(pc, i); set_pred(pc, i); @@ -808,6 +818,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) } static void +emit_ld_const(struct nv_pc *pc, struct nv_instruction *i) +{ + pc->emit[0] = 0x00000006; + pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10); + + emit_ld_common(pc, i); +} + +static void emit_ld(struct nv_pc *pc, struct nv_instruction *i) { if (SFILE(i, 0) >= NV_FILE_MEM_C(0) && @@ -818,6 +837,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) } else { emit_ld_const(pc, i); } + } else + if (SFILE(i, 0) == NV_FILE_MEM_L) { + pc->emit[0] = 0x00000005; + pc->emit[1] = 0xc0000000; + + emit_ld_common(pc, i); } else { NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0)); abort(); @@ -827,8 +852,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i) static void emit_st(struct nv_pc *pc, struct nv_instruction *i) { - NOUVEAU_ERR("emit_st: not handled yet\n"); - abort(); + if (SFILE(i, 0) != NV_FILE_MEM_L) + NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0)); + + pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */ + pc->emit[1] = 0xc8000000; + + emit_ldst_size(pc, i); + + set_pred(pc, i); + set_address_16(pc, i->src[0]); + + SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20); + DID(pc, i->src[1]->value, 14); } void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index acc72bff14c..c5a7367a5fd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi) nvc0_insn_refcount(nvi))); } +/* Check if we do not actually have to emit this instruction. */ static INLINE boolean inst_is_noop(struct nv_instruction *nvi) { @@ -141,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) struct nv_instruction *nvi, *next; int j; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -157,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nvc0_insn_delete(in->exit); } b->emit_pos = in->emit_pos + in->emit_size; + + if (in->emit_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; @@ -240,10 +245,15 @@ check_swap_src_0_1(struct nv_instruction *nvi) struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; - if (!nv_op_commutative(nvi->opcode)) + if (!nv_op_commutative(nvi->opcode) && + NV_BASEOP(nvi->opcode) != NV_OP_SET && + NV_BASEOP(nvi->opcode) != NV_OP_SLCT) return; assert(src0 && src1 && src0->value && src1->value); + if (src1->value->reg.file != NV_FILE_GPR) + return; + if (is_cspace_load(src0->value->insn)) { if (!is_cspace_load(src1->value->insn)) { nvi->src[0] = src1; @@ -258,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi) } } - if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET) - nvi->set_cond = cc_swapped[nvi->set_cond]; + if (nvi->src[0] != src0) { + if (NV_BASEOP(nvi->opcode) == NV_OP_SET) + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + else + if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) + nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); + } } static void @@ -543,6 +558,7 @@ constant_operand(struct nv_pc *pc, nv_reference(pc, nvi, s, nvi->src[t]->value); nvi->src[s]->mod = nvi->src[t]->mod; } + break; case NV_OP_ADD_F32: if (u.u32 == 0) { switch (nvi->src[t]->mod) { @@ -562,6 +578,7 @@ constant_operand(struct nv_pc *pc, if (nvi->opcode != NV_OP_CVT) nvi->src[0]->mod = 0; } + break; case NV_OP_ADD_B32: if (u.u32 == 0) { assert(nvi->src[t]->mod == 0); @@ -582,7 +599,7 @@ constant_operand(struct nv_pc *pc, } else if (u.s32 > 0 && u.s32 == (1 << shift)) { nvi->opcode = NV_OP_SHL; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift; nv_reference(pc, nvi, 0, nvi->src[t]->value); nv_reference(pc, nvi, 1, val); break; @@ -590,14 +607,14 @@ constant_operand(struct nv_pc *pc, break; case NV_OP_RCP: u.f32 = 1.0f / u.f32; - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); break; case NV_OP_RSQ: u.f32 = 1.0f / sqrtf(u.f32); - (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32; + (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32; nvi->opcode = NV_OP_MOV; assert(s == 0); nv_reference(pc, nvi, 0, val); @@ -607,25 +624,83 @@ constant_operand(struct nv_pc *pc, } } +static void +handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + + if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod)) + return; + if (src0->reg.file != NV_FILE_GPR) + return; + nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0); + nvc0_insn_delete(nvi); +} + +/* check if we can MUL + ADD -> MAD/FMA */ +static void +handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi) +{ + struct nv_value *src0 = nvi->src[0]->value; + struct nv_value *src1 = nvi->src[1]->value; + struct nv_value *src; + int s; + uint8_t mod[4]; + + if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0; + else + if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1; + else + return; + + if ((src0->insn && src0->insn->bb != nvi->bb) || + (src1->insn && src1->insn->bb != nvi->bb)) + return; + + /* check for immediates from prior constant folding */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + return; + src = nvi->src[s]->value; + + mod[0] = nvi->src[0]->mod; + mod[1] = nvi->src[1]->mod; + mod[2] = src->insn->src[0]->mod; + mod[3] = src->insn->src[1]->mod; + + if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) + return; + + nvi->opcode = NV_OP_MAD_F32; + + nv_reference(ctx->pc, nvi, s, NULL); + nvi->src[2] = nvi->src[!s]; + nvi->src[!s] = NULL; + + nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value); + nvi->src[0]->mod = mod[2] ^ mod[s]; + nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value); + nvi->src[1]->mod = mod[3]; +} + static int -nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) +nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *nvi, *next; int j; for (nvi = b->entry; nvi; nvi = next) { - struct nv_value *src0, *src1, *src; - int s; - uint8_t mod[4]; + struct nv_value *src0, *src1; + uint baseop = NV_BASEOP(nvi->opcode); next = nvi->next; src0 = nvc0_pc_find_immediate(nvi->src[0]); src1 = nvc0_pc_find_immediate(nvi->src[1]); - if (src0 && src1) + if (src0 && src1) { constant_expression(ctx->pc, nvi, src0, src1); - else { + } else { if (src0) constant_operand(ctx->pc, nvi, src0, 0); else @@ -633,44 +708,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) constant_operand(ctx->pc, nvi, src1, 1); } - /* check if we can MUL + ADD -> MAD/FMA */ - if (nvi->opcode != NV_OP_ADD) - continue; - - src0 = nvi->src[0]->value; - src1 = nvi->src[1]->value; - - if (SRC_IS_MUL(src0) && src0->refc == 1) - src = src0; + if (baseop == NV_OP_MIN || baseop == NV_OP_MAX) + handle_min_max(ctx, nvi); else - if (SRC_IS_MUL(src1) && src1->refc == 1) - src = src1; - else - continue; - - /* could have an immediate from above constant_* */ - if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) - continue; - s = (src == src0) ? 0 : 1; - - mod[0] = nvi->src[0]->mod; - mod[1] = nvi->src[1]->mod; - mod[2] = src->insn->src[0]->mod; - mod[3] = src->insn->src[0]->mod; - - if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG) - continue; - - nvi->opcode = NV_OP_MAD; - nv_reference(ctx->pc, nvi, s, NULL); - nvi->src[2] = nvi->src[!s]; - - nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value); - nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value); - nvi->src[0]->mod = mod[2] ^ mod[s]; - nvi->src[1]->mod = mod[3]; + if (nvi->opcode == NV_OP_ADD_F32) + handle_add_mul(ctx, nvi); } - DESCEND_ARBITRARY(j, nv_pass_lower_arith); + DESCEND_ARBITRARY(j, nv_pass_algebraic_opt); return 0; } @@ -700,8 +744,12 @@ struct pass_reld_elim { int alloc; }; +/* Extend the load operation in @rec to also cover the data loaded by @ld. + * The two loads may not overlap but reference adjacent memory locations. + */ static void -combine_load(struct mem_record *rec, struct nv_instruction *ld) +combine_load(struct nv_pc *pc, struct mem_record *rec, + struct nv_instruction *ld) { struct nv_instruction *fv = rec->insn; struct nv_value *mem = ld->src[0]->value; @@ -716,7 +764,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) return; rec->ofst = mem->reg.address; for (j = 0; j < d; ++j) - fv->def[d + j] = fv->def[j]; + fv->def[mem->reg.size / 4 + j] = fv->def[j]; d = 0; } else if ((size == 8 && rec->ofst & 3) || @@ -729,6 +777,9 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld) fv->def[d++]->insn = fv; } + if (fv->src[0]->value->refc > 1) + nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value)); + fv->src[0]->value->reg.address = rec->ofst; fv->src[0]->value->reg.size = rec->size = size; nvc0_insn_delete(ld); @@ -793,6 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) ((it->ofst >> 4) == (ofst >> 4)) && ((it->ofst + it->size == ofst) || (it->ofst - mem->reg.size == ofst))) { + /* only NV_OP_VFETCH can load exactly 12 bytes */ if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12) continue; if (it->ofst < ofst) { @@ -808,7 +860,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) switch (ld->opcode) { case NV_OP_EXPORT: combine_export(it, ld); break; default: - combine_load(it, ld); + combine_load(ctx->pc, it, ld); break; } } else @@ -817,6 +869,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b) } } + ctx->alloc = 0; + ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL; + for (s = 0; s < 16; ++s) + ctx->mem_c[s] = NULL; + DESCEND_ARBITRARY(s, nv_pass_mem_opt); return 0; } @@ -1006,7 +1063,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b) return 0; } -#if 0 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE. * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with * BREAK and dummy ELSE block. @@ -1027,61 +1083,187 @@ bb_is_if_else_endif(struct nv_basic_block *bb) } } -/* predicate instructions and remove branch at the end */ +/* Predicate instructions and delete any branch at the end if it is + * not a break from a loop. + */ static void predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b, - struct nv_value *p, ubyte cc) + struct nv_value *pred, uint8_t cc) { + struct nv_instruction *nvi, *prev; + int s; + if (!b->entry) + return; + for (nvi = b->entry; nvi; nvi = nvi->next) { + prev = nvi; + if (inst_is_noop(nvi)) + continue; + for (s = 0; nvi->src[s]; ++s); + assert(s < 6); + nvi->predicate = s; + nvi->cc = cc; + nv_reference(pc, nvi, nvi->predicate, pred); + } + if (prev->opcode == NV_OP_BRA && + b->out_kind[0] != CFG_EDGE_LOOP_LEAVE && + b->out_kind[1] != CFG_EDGE_LOOP_LEAVE) + nvc0_insn_delete(prev); } -#endif -/* NOTE: Run this after register allocation, we can just cut out the cflow - * instructions and hook the predicates to the conditional OPs if they are - * not using immediates; better than inserting SELECT to join definitions. - * - * NOTE: Should adapt prior optimization to make this possible more often. +static INLINE boolean +may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred) +{ + if (nvi->def[0] && values_equal(nvi->def[0], pred)) + return FALSE; + return nvc0_insn_is_predicateable(nvi); +} + +/* Transform IF/ELSE/ENDIF constructs into predicated instructions + * where feasible. */ static int nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) { + struct nv_instruction *nvi; + struct nv_value *pred; + int k; + int n0, n1; /* instruction counts of outgoing blocks */ + + if (bb_is_if_else_endif(b)) { + assert(b->exit && b->exit->opcode == NV_OP_BRA); + + assert(b->exit->predicate >= 0); + pred = b->exit->src[b->exit->predicate]->value; + + n1 = n0 = 0; + for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) + if (!may_predicate_insn(nvi, pred)) + break; + if (!nvi) { + /* we're after register allocation, so there always is an ELSE block */ + for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) + if (!may_predicate_insn(nvi, pred)) + break; + } + + /* 12 is an arbitrary limit */ + if (!nvi && n0 < 12 && n1 < 12) { + predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc); + predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc); + + nvc0_insn_delete(b->exit); /* delete the branch */ + + /* and a potential joinat before it */ + if (b->exit && b->exit->opcode == NV_OP_JOINAT) + nvc0_insn_delete(b->exit); + + /* remove join operations at the end of the conditional */ + k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + if ((nvi = b->out[0]->out[k]->entry)) { + nvi->join = 0; + if (nvi->opcode == NV_OP_JOIN) + nvc0_insn_delete(nvi); + } + } + } + DESCEND_ARBITRARY(k, nv_pass_flatten); + return 0; } +/* Tests instructions for equality, but independently of sources. */ +static boolean +is_operation_equal(struct nv_instruction *a, struct nv_instruction *b) +{ + if (a->opcode != b->opcode) + return FALSE; + if (nv_is_texture_op(a->opcode)) { + if (a->ext.tex.t != b->ext.tex.t || + a->ext.tex.s != b->ext.tex.s) + return FALSE; + if (a->tex_dim != b->tex_dim || + a->tex_array != b->tex_array || + a->tex_cube != b->tex_cube || + a->tex_shadow != b->tex_shadow || + a->tex_live != b->tex_live) + return FALSE; + } else + if (a->opcode == NV_OP_CVT) { + if (a->ext.cvt.s != b->ext.cvt.s || + a->ext.cvt.d != b->ext.cvt.d) + return FALSE; + } else + if (NV_BASEOP(a->opcode) == NV_OP_SET || + NV_BASEOP(a->opcode) == NV_OP_SLCT) { + if (a->set_cond != b->set_cond) + return FALSE; + } else + if (a->opcode == NV_OP_LINTERP || + a->opcode == NV_OP_PINTERP) { + if (a->centroid != b->centroid || + a->flat != b->flat) + return FALSE; + } + if (a->cc != b->cc) + return FALSE; + if (a->lanes != b->lanes || + a->patch != b->patch || + a->saturate != b->saturate) + return FALSE; + if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */ + return FALSE; + return TRUE; +} + /* local common subexpression elimination, stupid O(n^2) implementation */ static int nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_instruction *ir, *ik, *next; struct nv_instruction *entry = b->phi ? b->phi : b->entry; - int s; + int s, d; unsigned int reps; do { reps = 0; for (ir = entry; ir; ir = next) { next = ir->next; + if (ir->fixed) + continue; for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode || ir->fixed) + if (!is_operation_equal(ir, ik)) continue; - - if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1]) + if (!ir->def[0] || !ik->def[0]) continue; if (ik->indirect != ir->indirect || ik->predicate != ir->predicate) continue; - if (!values_equal(ik->def[0], ir->def[0])) + for (d = 0; d < 4; ++d) { + if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0)) + break; + if (ir->def[d]) { + if (!values_equal(ik->def[0], ir->def[0])) + break; + } else { + d = 4; + break; + } + } + if (d != 4) continue; - for (s = 0; s < 3; ++s) { + for (s = 0; s < 5; ++s) { struct nv_value *a, *b; - if (!ik->src[s]) { - if (ir->src[s]) - break; - continue; + if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0)) + break; + if (!ir->src[s]) { + s = 5; + break; } + if (ik->src[s]->mod != ir->src[s]->mod) break; a = ik->src[s]->value; @@ -1089,14 +1271,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) if (a == b) continue; if (a->reg.file != b->reg.file || - a->reg.id < 0 || + a->reg.id < 0 || /* this excludes memory loads/stores */ a->reg.id != b->reg.id) break; } - if (s == 3) { + if (s == 5) { nvc0_insn_delete(ir); + for (d = 0; d < 4 && ir->def[d]; ++d) + nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]); ++reps; - nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]); break; } } @@ -1110,13 +1293,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1124,20 +1309,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } @@ -1153,11 +1335,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) pass.n = 0; pass.pc = pc; + /* Do CSE so we can just compare values by pointer in subsequent passes. */ + pc->pass_seq++; + ret = nv_pass_cse(&pass, root); + if (ret) + return ret; + /* Do this first, so we don't have to pay attention * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, root); + ret = nv_pass_algebraic_opt(&pass, root); if (ret) return ret; @@ -1185,11 +1373,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) reldelim->pc = pc; } - pc->pass_seq++; - ret = nv_pass_cse(&pass, root); - if (ret) - return ret; - + /* May run DCE before load-combining since that pass will clean up + * after itself. + */ dce.pc = pc; do { dce.removed = 0; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index b03826484e4..90c669cc4b8 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i) PRINT("%s", gree); if (NV_BASEOP(i->opcode) == NV_OP_SET) - PRINT("set %s", nv_cond_name(i->set_cond)); + PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond)); else if (i->saturate) PRINT("sat %s", nvc0_opcode_name(i->opcode)); @@ -278,10 +278,10 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 }, { NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 }, - { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 }, { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 }, @@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, @@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 }, { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, - { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, - { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 }, + { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, + { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 }, { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, @@ -363,13 +363,13 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, + { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, - { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 }, + { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a1507..f4afe083e2d 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -39,6 +39,30 @@ struct register_set { struct nv_pc *pc; }; +/* aliasing is allowed */ +static void +intersect_register_sets(struct register_set *dst, + struct register_set *src1, struct register_set *src2) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0]; + dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1]; + } +} + +static void +mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask) +{ + int i; + + for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) { + set->bits[i][0] = (set->bits[i][0] | mask) & umask; + set->bits[i][1] = (set->bits[i][1] | mask) & umask; + } +} + struct nv_pc_pass { struct nv_pc *pc; struct nv_instruction **insns; @@ -63,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range) { struct nv_range *range, **nextp = &val->livei; + if (bgn == end) /* [a, a) is invalid / empty */ + return TRUE; + for (range = val->livei; range; range = range->next) { if (end < range->bgn) break; /* insert before */ @@ -327,14 +354,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) assert(b->join == a->join); } -static INLINE void +static INLINE boolean try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) { if (!join_allowed(ctx, a, b)) { #ifdef NVC0_RA_DEBUG_JOIN debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n); #endif - return; + return FALSE; } if (livei_have_overlap(a->join, b->join)) { #ifdef NVC0_RA_DEBUG_JOIN @@ -342,10 +369,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b) livei_print(a); livei_print(b); #endif - return; + return FALSE; } do_join_values(ctx, a, b); + + return TRUE; +} + +static void +join_values_nofail(struct nv_pc_pass *ctx, + struct nv_value *a, struct nv_value *b, boolean type_only) +{ + if (type_only) { + assert(join_allowed(ctx, a, b)); + do_join_values(ctx, a, b); + } else { + boolean ok = try_join_values(ctx, a, b); + if (!ok) { + NOUVEAU_ERR("failed to coalesce values\n"); + } + } } static INLINE boolean @@ -360,20 +404,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p) return (b->num_in > 1) && (n == 2); } +/* Look for the @phi's operand whose definition reaches @b. */ static int phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b, struct nv_basic_block *tb) { + struct nv_ref *srci, *srcj; int i, j; for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) { - if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb)) + srci = phi->src[i]; + /* if already replaced, check with original source first */ + if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV) + srci = srci->value->insn->src[0]; + if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL)) continue; /* NOTE: back-edges are ignored by the reachable-by check */ - if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb, - phi->src[i]->value->insn->bb, tb)) + if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb, + srci->value->insn->bb, NULL)) { j = i; + srcj = srci; + } } + if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL)) + if (!nvc0_bblock_reachable_by(srcj->value->insn->bb, + phi->def[0]->insn->bb, NULL)) + j = -1; return j; } @@ -420,21 +476,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ctx->pc->current_block = pn; for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) { - if ((j = phi_opnd_for_bb(i, p, b)) < 0) - continue; - val = i->src[j]->value; - - if (i->src[j]->flags) { - /* value already encountered from a different in-block */ - val = val->insn->src[0]->value; - while (j < 6 && i->src[j]) - ++j; - assert(j < 6); + j = phi_opnd_for_bb(i, p, b); + + if (j < 0) { + val = i->def[0]; + } else { + val = i->src[j]->value; + if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) { + j = -1; + /* use original value, we already encountered & replaced it */ + val = val->insn->src[0]->value; + } } + if (j < 0) /* need an additional source ? */ + for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j); + assert(j < 6); /* XXX: really ugly shaders */ ni = new_instruction(ctx->pc, NV_OP_MOV); - - /* TODO: insert instruction at correct position in the first place */ if (ni->prev && ni->prev->target) nvc0_insns_permute(ni->prev, ni); @@ -442,7 +500,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) ni->def[0]->insn = ni; nv_reference(ctx->pc, ni, 0, val); nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */ - i->src[j]->flags = 1; + i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV; } if (pn != p && pn->exit) { @@ -460,8 +518,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b) return 0; } +#define JOIN_MASK_PHI (1 << 0) +#define JOIN_MASK_SELECT (1 << 1) +#define JOIN_MASK_MOV (1 << 2) +#define JOIN_MASK_BIND (1 << 3) + static int -pass_join_values(struct nv_pc_pass *ctx, int iter) +pass_join_values(struct nv_pc_pass *ctx, unsigned mask) { int c, n; @@ -470,36 +533,33 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) switch (i->opcode) { case NV_OP_PHI: - if (iter != 2) + if (!(mask & JOIN_MASK_PHI)) break; for (c = 0; c < 6 && i->src[c]; ++c) - try_join_values(ctx, i->def[0], i->src[c]->value); + join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE); break; case NV_OP_MOV: - if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + if (!(mask & JOIN_MASK_MOV)) + break; + if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1]) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: - if (iter != 1) + if (!(mask & JOIN_MASK_SELECT)) break; - for (c = 0; c < 6 && i->src[c]; ++c) { - assert(join_allowed(ctx, i->def[0], i->src[c]->value)); - do_join_values(ctx, i->def[0], i->src[c]->value); - } - break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ + for (c = 0; c < 6 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE); break; case NV_OP_BIND: - if (iter) + if (!(mask & JOIN_MASK_BIND)) break; - for (c = 0; c < 6 && i->src[c]; ++c) - do_join_values(ctx, i->def[c], i->src[c]->value); + for (c = 0; c < 4 && i->src[c]; ++c) + join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -621,15 +681,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n) { int i; - if (b->out[0]) { - if (b->out[1]) { /* what to do about back-edges ? */ + /* XXX: what to do about back/fake-edges (used to include both here) ? */ + if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { for (i = 0; i < n; ++i) b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i]; } else { memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t)); } } else - if (b->out[1]) { + if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) { memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t)); } else { memset(b->live_set, 0, n * sizeof(uint32_t)); @@ -746,42 +807,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval) elem->next = nval; } -static int -pass_linear_scan(struct nv_pc_pass *ctx, int iter) +static void +collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head, + boolean assigned_only) { - struct nv_instruction *i; - struct register_set f, free; + struct nv_value *val; int k, n; - struct nv_value *cur, *val, *tmp[2]; - struct nv_value active, inactive, handled, unhandled; - make_empty_list(&active); - make_empty_list(&inactive); - make_empty_list(&handled); - make_empty_list(&unhandled); + make_empty_list(head); - nvc0_ctor_register_set(ctx->pc, &free); - - /* joined values should have range = NULL and thus not be added; - * also, fixed memory values won't be added because they're not - * def'd, just used - */ for (n = 0; n < ctx->num_insns; ++n) { - i = ctx->insns[n]; + struct nv_instruction *i = ctx->insns[n]; + /* for joined values, only the representative will have livei != NULL */ for (k = 0; k < 5; ++k) { if (i->def[k] && i->def[k]->livei) - insert_ordered_tail(&unhandled, i->def[k]); - else - if (0 && i->def[k]) - debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n); + if (!assigned_only || i->def[k]->reg.id >= 0) + insert_ordered_tail(head, i->def[k]); } } - for (val = unhandled.next; val != unhandled.prev; val = val->next) { + for (val = head->next; val != head->prev; val = val->next) { assert(val->join == val); assert(val->livei->bgn <= val->next->livei->bgn); } +} + +static int +pass_linear_scan(struct nv_pc_pass *ctx) +{ + struct register_set f, free; + struct nv_value *cur, *val, *tmp[2]; + struct nv_value active, inactive, handled, unhandled; + + make_empty_list(&active); + make_empty_list(&inactive); + make_empty_list(&handled); + + nvc0_ctor_register_set(ctx->pc, &free); + + collect_register_values(ctx, &unhandled, FALSE); foreach_s(cur, tmp[0], &unhandled) { remove_from_list(cur); @@ -818,14 +883,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) reg_occupy(&f, val); if (cur->reg.id < 0) { - boolean mem = FALSE; - int v = nvi_vector_size(cur->insn); - - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else - if (iter) - mem = !reg_assign(&f, &cur, 1); + boolean mem = !reg_assign(&f, &cur, 1); if (mem) { NOUVEAU_ERR("out of registers\n"); @@ -839,6 +897,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } +/* Allocate values defined by instructions such as TEX, which have to be + * assigned to consecutive registers. + * Linear scan doesn't really work here since the values can have different + * live intervals. + */ +static int +pass_allocate_constrained_values(struct nv_pc_pass *ctx) +{ + struct nv_value regvals, *val; + struct nv_instruction *i; + struct nv_value *defs[4]; + struct register_set regs[4]; + int n, vsize, c; + uint32_t mask; + boolean mem; + + collect_register_values(ctx, ®vals, TRUE); + + for (n = 0; n < ctx->num_insns; ++n) { + i = ctx->insns[n]; + vsize = nvi_vector_size(i); + if (!(vsize > 1)) + continue; + assert(vsize <= 4); + + for (c = 0; c < vsize; ++c) + defs[c] = i->def[c]->join; + + if (defs[0]->reg.id >= 0) { + for (c = 1; c < vsize; ++c) + assert(defs[c]->reg.id >= 0); + continue; + } + + for (c = 0; c < vsize; ++c) { + nvc0_ctor_register_set(ctx->pc, ®s[c]); + + foreach(val, ®vals) { + if (val->reg.id >= 0 && livei_have_overlap(val, defs[c])) + reg_occupy(®s[c], val); + } + mask = 0x11111111; + if (vsize == 2) /* granularity is 2 and not 4 */ + mask |= 0x11111111 << 2; + mask_register_set(®s[c], 0, mask << c); + + if (defs[c]->livei) + insert_ordered_tail(®vals, defs[c]); + } + for (c = 1; c < vsize; ++c) + intersect_register_sets(®s[0], ®s[0], ®s[c]); + + mem = !reg_assign(®s[0], &defs[0], vsize); + + if (mem) { + NOUVEAU_ERR("out of registers\n"); + abort(); + } + } + return 0; +} + static int nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { @@ -862,6 +982,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) ret = pass_generate_phi_movs(ctx, root); assert(!ret); +#ifdef NVC0_RA_DEBUG_LIVEI + nvc0_print_function(root); +#endif + for (i = 0; i < pc->loop_nesting_bound; ++i) { pc->pass_seq++; ret = pass_build_live_sets(ctx, root); @@ -888,19 +1012,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) livei_print(&pc->values[i]); #endif - ret = pass_join_values(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_PHI); if (ret) goto out; - ret = pass_linear_scan(ctx, 0); + ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND); if (ret) goto out; - ret = pass_join_values(ctx, 1); + ret = pass_join_values(ctx, JOIN_MASK_MOV); if (ret) goto out; - ret = pass_join_values(ctx, 2); + ret = pass_allocate_constrained_values(ctx); if (ret) goto out; - ret = pass_linear_scan(ctx, 1); + ret = pass_linear_scan(ctx); if (ret) goto out; diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index aefaf7b98ad..899fe147c6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x2e0; */ case TGSI_SEMANTIC_GENERIC: + /* We'd really like to distinguish between TEXCOORD and GENERIC here, + * since only 0x300 to 0x37c can be replaced by sprite coordinates. + * Also, gl_PointCoord should be a system value and must be assigned to + * address 0x2e0. For now, let's cheat: + */ assert(si < 31); - return 0x80 + (si * 16); + if (si <= 7) + return 0x300 + si * 16; + if (si == 9) + return 0x2e0; + return 0x80 + ((si - 8) * 16); case TGSI_SEMANTIC_NORMAL: return 0x360; case TGSI_SEMANTIC_PRIMID: @@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti, case TGSI_FILE_INPUT: for (i = first; i <= last; ++i) { if (ti->prog->type == PIPE_SHADER_VERTEX) { - sn = TGSI_SEMANTIC_GENERIC; - si = i; + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; + } else { + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for sprite coordinates: */ + ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; } - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - if (ti->prog->type == PIPE_SHADER_FRAGMENT) ti->interp_mode[i] = nvc0_interp_mode(decl); } @@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti, } else { for (c = 0; c < 4; ++c) ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + /* for TFB_VARYING_LOCS: */ + ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; } } break; @@ -288,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti, ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; + case TGSI_FILE_TEMPORARY: + ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); + break; case TGSI_FILE_NULL: case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: case TGSI_FILE_IMMEDIATE: @@ -518,8 +533,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) if (!ti->input_access[i][c]) continue; a = ti->input_loc[i][c] / 2; - if ((a & ~7) == 0x70/2) - fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + if (ti->input_loc[i][c] >= 0x2c0) + a -= 32; + if (ti->input_loc[i][0] == 0x70) + fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ + else + if (ti->input_loc[i][0] == 0x2e0) + fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ else fp->hdr[4 + a / 32] |= m << (a % 32); } @@ -618,7 +638,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) if (ti->scan.writes_z) prog->flags[0] = 0x11; /* ? */ else - if (!ti->global_stores) + if (!ti->scan.uses_kill && !ti->global_stores) prog->fp.early_z = 1; ret = nvc0_fp_gen_header(prog, ti); @@ -629,6 +649,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) break; } + if (ti->require_stores) { + prog->hdr[0] |= 1 << 26; + prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + } + assert(!ret); return ret; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index e6b210d1355..f6fea29780b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -21,16 +21,18 @@ struct nvc0_program { unsigned code_size; unsigned parm_size; - uint32_t hdr[20]; + uint32_t hdr[20]; /* TODO: move this into code to save space */ uint32_t flags[2]; struct { uint8_t edgeflag; uint8_t num_ucps; + uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; } vp; struct { uint8_t early_z; + uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; } fp; void *relocs; @@ -74,6 +76,7 @@ struct nvc0_translation_info { uint32_t *immd32; ubyte *immd32_ty; unsigned immd32_nr; + unsigned temp128_nr; ubyte edgeflag_out; struct nvc0_subroutine *subr; unsigned num_subrs; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 74c3451c19a..fcbb7da41a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct push_context ctx; unsigned i, index_size; unsigned inst = info->instance_count; + boolean apply_bias = info->indexed && info->index_bias; ctx.chan = nvc0->screen->base.channel; ctx.translate = nvc0->vertex->translate; @@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) data = nvc0_resource_map_offset(nvc0, res, vb->buffer_offset, NOUVEAU_BO_RD); - if (info->indexed) + + if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i)))) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index cc83fbe771c..e5e43c0e7a5 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe, if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe, OUT_RING (chan, 0x00001001); } + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h index 17e79642a6d..599823c0dc9 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.h +++ b/src/gallium/drivers/nvc0/nvc0_resource.h @@ -24,7 +24,8 @@ struct nvc0_context; * USER_MEMORY: resource->data is a pointer to client memory and may change * between GL calls */ -#define NVC0_BUFFER_STATUS_DIRTY (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0) +#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1) #define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7) /* Resources, if mapped into the GPU's address space, are guaranteed to @@ -51,6 +52,9 @@ struct nvc0_resource { struct nvc0_mm_allocation *mm; }; +void +nvc0_buffer_release_gpu_storage(struct nvc0_resource *); + boolean nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *, unsigned start, unsigned size); @@ -87,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0, nvc0_buffer_adjust_score(nvc0, res, -250); if ((res->domain == NOUVEAU_BO_VRAM) && - (res->status & NVC0_BUFFER_STATUS_DIRTY)) + (res->status & NVC0_BUFFER_STATUS_GPU_WRITING)) nvc0_buffer_download(nvc0, res, 0, res->base.width0); if ((res->domain != NOUVEAU_BO_GART) || diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index f608b32e1cb..f7f1fd09a12 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_TEXTURE_SWIZZLE: @@ -281,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan) BEGIN_RING(chan, RING_3D_(0x074c), 1); OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D_(0x10f8), 1); - OUT_RING (chan, 0x0101); - BEGIN_RING(chan, RING_3D_(0x16a8), 1); OUT_RING (chan, (3 << 16) | 3); BEGIN_RING(chan, RING_3D_(0x1794), 1); @@ -476,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (15 << 4) | 1); } - screen->tls_size = 4 * 4 * 32 * 128 * 4; + screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, &screen->tls); if (ret) @@ -490,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RING (chan, screen->tls_size >> 32); OUT_RING (chan, screen->tls_size); + BEGIN_RING(chan, RING_3D_(0x07a0), 1); + OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1); OUT_RING (chan, 0); @@ -532,16 +533,27 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */ OUT_RING (chan, 0x3f); - BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1); + OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2); + for (i = 0; i < 8 * 2; ++i) + OUT_RING(chan, 0); + BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1); OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1); OUT_RING (chan, 0); + /* neither scissors, viewport nor stencil mask should affect clears */ + BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1); OUT_RING (chan, 1); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); OUT_RINGf (chan, 0.0f); OUT_RINGf (chan, 1.0f); + BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); + OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1); /* We use scissors instead of exact view volume clipping, * so they're always enabled. @@ -628,11 +640,14 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + MARK_RING(chan, 5, 5); nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); - nouveau_bo_validate(chan, screen->tls, flags); nouveau_bo_validate(chan, screen->mp_stack_bo, flags); + + if (screen->cur_ctx && screen->cur_ctx->state.tls_required) + nouveau_bo_validate(chan, screen->tls, flags); } int diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 1fac142e2be..d952ff1f9b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -128,17 +128,25 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags) { struct nvc0_screen *screen = nvc0_screen(res->base.screen); - nouveau_bo_validate(screen->base.channel, res->bo, flags); + if (likely(res->bo)) { + nouveau_bo_validate(screen->base.channel, res->bo, flags); - nvc0_resource_fence(res, flags); + if (flags & NOUVEAU_BO_WR) + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; + if (flags & NOUVEAU_BO_RD) + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + + nvc0_resource_fence(res, flags); + } } boolean nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit); - void nvc0_screen_fence_next(struct nvc0_screen *); +void +nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed); static INLINE boolean nvc0_screen_fence_emit(struct nvc0_screen *screen) diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 981b5488d08..357f8b80deb 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -27,6 +27,16 @@ #include "nvc0_context.h" +static INLINE void +nvc0_program_update_context_state(struct nvc0_context *nvc0, + struct nvc0_program *prog, int stage) +{ + if (prog->hdr[1]) + nvc0->state.tls_required |= 1 << stage; + else + nvc0->state.tls_required &= ~(1 << stage); +} + static boolean nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) { @@ -55,7 +65,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) prog->code_base + NVC0_SHADER_HEADER_SIZE, prog->code_size, prog->code); - BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1); + BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); OUT_RING (nvc0->screen->base.channel, 0x1111); return TRUE; @@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, vp)) return; + nvc0_program_update_context_state(nvc0, vp, 0); BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2); OUT_RING (chan, 0x11); @@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) if (!nvc0_program_validate(nvc0, fp)) return; + nvc0_program_update_context_state(nvc0, fp, 4); BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); OUT_RING (chan, fp->fp.early_z); @@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 1); BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2); OUT_RING (chan, 0x21); @@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, tp)) return; + nvc0_program_update_context_state(nvc0, tp, 2); BEGIN_RING(chan, RING_3D(TEP_SELECT), 1); OUT_RING (chan, 0x31); @@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) } if (!nvc0_program_validate(nvc0, gp)) return; + nvc0_program_update_context_state(nvc0, gp, 3); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x41); @@ -178,3 +193,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1); OUT_RING (chan, gp->max_gpr); } + +/* It's *is* kind of shader related. We need to inspect the program + * to get the output locations right. + */ +void +nvc0_tfb_validate(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_program *vp; + struct nvc0_transform_feedback_state *tfb = nvc0->tfb; + int b; + + BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); + if (!tfb) { + OUT_RING(chan, 0); + return; + } + OUT_RING(chan, 1); + + vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + + for (b = 0; b < nvc0->num_tfbbufs; ++b) { + uint8_t idx, var[128]; + int i, n; + struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]); + + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); + OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); + OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + + if (!(nvc0->dirty & NVC0_NEW_TFB)) + continue; + + BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + n = b ? tfb->varying_count[b - 1] : 0; + i = 0; + for (; i < tfb->varying_count[b]; ++i) { + idx = tfb->varying_index[n + i]; + var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + } + for (; i & 3; ++i) + var[i] = 0; + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + } + for (; b < 4; ++b) + IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); +} diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index c08f3693f5e..aa437195764 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -22,6 +22,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -50,40 +51,35 @@ nvc0_colormask(unsigned mask) return ret; } +#define NVC0_BLEND_FACTOR_CASE(a, b) \ + case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b + static INLINE uint32_t nvc0_blend_fac(unsigned factor) { - static const uint16_t bf[] = { - NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */ - NV50_3D_BLEND_FACTOR_ONE, - NV50_3D_BLEND_FACTOR_SRC_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_DST_ALPHA, - NV50_3D_BLEND_FACTOR_DST_COLOR, - NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE, - NV50_3D_BLEND_FACTOR_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_SRC1_ALPHA, - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */ - NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */ - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, - NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - }; - - assert(factor < (sizeof(bf) / sizeof(bf[0]))); - return bf[factor]; + switch (factor) { + NVC0_BLEND_FACTOR_CASE(ONE, ONE); + NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE); + NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA); + NVC0_BLEND_FACTOR_CASE(ZERO, ZERO); + NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR); + NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA); + default: + return NV50_3D_BLEND_FACTOR_ZERO; + } } static void * @@ -176,9 +172,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, return NULL; so->pipe = *cso; -#ifndef NVC0_SCISSORS_CLIPPING - SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor); -#endif + /* Scissor enables are handled in scissor state, we will not want to + * always emit 16 commands, one for each scissor rectangle, here. + */ SB_BEGIN_3D(so, SHADE_MODEL, 1); SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : @@ -242,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1); SB_DATA (so, fui(cso->offset_scale)); SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1); - SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */ + SB_DATA (so, fui(cso->offset_units * 2.0f)); } assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); @@ -283,20 +279,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe, } if (cso->stencil[0].enabled) { - SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5); + SB_BEGIN_3D(so, STENCIL_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func)); - SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2); - SB_DATA (so, cso->stencil[0].writemask); + SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2); SB_DATA (so, cso->stencil[0].valuemask); + SB_DATA (so, cso->stencil[0].writemask); } else { - SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0); + SB_IMMED_3D(so, STENCIL_ENABLE, 0); } if (cso->stencil[1].enabled) { + assert(cso->stencil[0].enabled); SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5); SB_DATA (so, 1); SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op)); @@ -306,7 +303,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2); SB_DATA (so, cso->stencil[1].writemask); SB_DATA (so, cso->stencil[1].valuemask); - } else { + } else + if (cso->stencil[0].enabled) { SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); } @@ -808,6 +806,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nvc0->dirty |= NVC0_NEW_VERTEX; } +static void * +nvc0_tfb_state_create(struct pipe_context *pipe, + const struct pipe_stream_output_state *pso) +{ + struct nvc0_transform_feedback_state *so; + int n = 0; + int i, c, b; + + so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!so) + return NULL; + + for (b = 0; b < 4; ++b) { + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output_buffer[i] != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->register_mask[i] & (1 << c))) + continue; + so->varying_count[b]++; + so->varying_index[n++] = (pso->register_index[i] << 2) | c; + } + } + so->stride[b] = so->varying_count[b] * 4; + } + if (pso->stride) + so->stride[0] = pso->stride; + + return so; +} + +static void +nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +{ + nvc0_context(pipe)->tfb = hwcso; + nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; +} + +static void +nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + int i; + + assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + + for (i = 0; i < num_buffers; ++i) { + assert(offsets[i] >= 0); + nvc0->tfb_offset[i] = offsets[i]; + pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + + nvc0->num_tfbbufs = num_buffers; + + nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; +} + void nvc0_init_state_functions(struct nvc0_context *nvc0) { @@ -861,5 +927,12 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers; nvc0->pipe.set_index_buffer = nvc0_set_index_buffer; + + nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create; + nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete; + nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind; + nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers; + + nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 25aec0244db..70c418fad9b 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0) else width = fb->width; + MARK_RING (chan, 23, 4); BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */ OUT_RING (chan, 0); BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */ @@ -57,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; unsigned i; + boolean serialize = FALSE; nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME); @@ -66,12 +68,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, fb->width << 16); OUT_RING (chan, fb->height << 16); + MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs); + for (i = 0; i < fb->nr_cbufs; ++i) { struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture); struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]); struct nouveau_bo *bo = mt->base.bo; uint32_t offset = sf->offset; - + BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -83,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->depth); OUT_RING (chan, mt->layer_stride >> 2); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } @@ -93,7 +102,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) struct nouveau_bo *bo = mt->base.bo; int unk = mt->base.base.target == PIPE_TEXTURE_2D; uint32_t offset = sf->offset; - + + MARK_RING (chan, 12, 2); BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5); OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); @@ -107,6 +117,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, sf->height); OUT_RING (chan, (unk << 16) | sf->depth); + if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING) + serialize = TRUE; + mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING; + mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); } else { @@ -114,11 +129,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0) OUT_RING (chan, 0); } -#ifndef NVC0_SCISSORS_CLIPPING - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, fb->width << 16); - OUT_RING (chan, fb->height << 16); -#endif + if (serialize) { + BEGIN_RING(chan, RING_3D(SERIALIZE), 1); + OUT_RING (chan, 0); + } } static void @@ -160,65 +174,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; struct pipe_scissor_state *s = &nvc0->scissor; -#ifdef NVC0_SCISSORS_CLIPPING - struct pipe_viewport_state *vp = &nvc0->viewport; - int minx, maxx, miny, maxy; - if (!(nvc0->dirty & - (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) && - nvc0->state.scissor == nvc0->rast->pipe.scissor) + if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + nvc0->rast->pipe.scissor == nvc0->state.scissor) return; nvc0->state.scissor = nvc0->rast->pipe.scissor; - if (nvc0->state.scissor) { - minx = s->minx; - maxx = s->maxx; - miny = s->miny; - maxy = s->maxy; + BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); + if (nvc0->rast->pipe.scissor) { + OUT_RING(chan, (s->maxx << 16) | s->minx); + OUT_RING(chan, (s->maxy << 16) | s->miny); } else { - minx = 0; - maxx = nvc0->framebuffer.width; - miny = 0; - maxy = nvc0->framebuffer.height; + OUT_RING(chan, (0xffff << 16) | 0); + OUT_RING(chan, (0xffff << 16) | 0); } - - minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0]))); - maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0]))); - miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1]))); - maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1]))); - - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (maxx << 16) | minx); - OUT_RING (chan, (maxy << 16) | miny); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, ((maxx - minx) << 16) | minx); - OUT_RING (chan, ((maxy - miny) << 16) | miny); -#else - BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2); - OUT_RING (chan, (s->maxx << 16) | s->minx); - OUT_RING (chan, (s->maxy << 16) | s->miny); -#endif } static void nvc0_validate_viewport(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; + struct pipe_viewport_state *vp = &nvc0->viewport; + int x, y, w, h; + float zmin, zmax; BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.translate[0]); - OUT_RINGf (chan, nvc0->viewport.translate[1]); - OUT_RINGf (chan, nvc0->viewport.translate[2]); + OUT_RINGf (chan, vp->translate[0]); + OUT_RINGf (chan, vp->translate[1]); + OUT_RINGf (chan, vp->translate[2]); BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3); - OUT_RINGf (chan, nvc0->viewport.scale[0]); - OUT_RINGf (chan, nvc0->viewport.scale[1]); - OUT_RINGf (chan, nvc0->viewport.scale[2]); + OUT_RINGf (chan, vp->scale[0]); + OUT_RINGf (chan, vp->scale[1]); + OUT_RINGf (chan, vp->scale[2]); -#ifdef NVC0_SCISSORS_CLIPPING + /* now set the viewport rectangle to viewport dimensions for clipping */ + + x = (int)(vp->translate[0] - fabsf(vp->scale[0])); + y = (int)(vp->translate[1] - fabsf(vp->scale[1])); + w = (int)fabsf(2.0f * vp->scale[0]); + h = (int)fabsf(2.0f * vp->scale[1]); + zmin = vp->translate[2] - fabsf(vp->scale[2]); + zmax = vp->translate[2] + fabsf(vp->scale[2]); + + BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); + OUT_RING (chan, (w << 16) | x); + OUT_RING (chan, (h << 16) | y); BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2); - OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]); - OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]); -#endif + OUT_RINGf (chan, zmin); + OUT_RINGf (chan, zmax); } static void @@ -227,10 +230,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; uint32_t clip; - clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002; -#ifndef NVC0_SCISSORS_CLIPPING - clip |= 0x1080; -#endif + if (nvc0->clip.depth_clamp) { + clip = + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2; + } else { + clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1; + } BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); OUT_RING (chan, clip); @@ -238,6 +246,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (nvc0->clip.nr) { struct nouveau_bo *bo = nvc0->screen->uniforms; + MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, 256); OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -281,6 +290,32 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) } static void +nvc0_validate_sprite_coords(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + uint32_t reg; + + if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT; + else + reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + if (nvc0->rast->pipe.point_quad_rasterization) { + uint32_t en = nvc0->rast->pipe.sprite_coord_enable; + + while (en) { + int i = ffs(en) - 1; + en &= ~(1 << i); + if (i >= 0 && i < 8) + reg |= 8 << i; + } + } + + BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); + OUT_RING (chan, reg); +} + +static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; @@ -340,6 +375,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); if (rebind) { + MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -357,6 +393,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1); + MARK_RING (chan, nr + 5, 2); BEGIN_RING(chan, RING_3D(CB_SIZE), 3); OUT_RING (chan, align(res->base.width0, 0x100)); OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -383,13 +420,7 @@ static struct state_validate { { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, -#ifdef NVC0_SCISSORS_CLIPPING - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | - NVC0_NEW_RASTERIZER | - NVC0_NEW_FRAMEBUFFER }, -#else - { nvc0_validate_scissor, NVC0_NEW_SCISSOR }, -#endif + { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, { nvc0_validate_clip, NVC0_NEW_CLIP }, { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, @@ -397,10 +428,12 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS } + { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, + { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 6c8028aba13..57566128ab5 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -4,8 +4,6 @@ #include "pipe/p_state.h" -#define NVC0_SCISSORS_CLIPPING - #define SB_BEGIN_3D(so, m, s) \ (so)->state[(so)->size++] = \ (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2) @@ -67,16 +65,17 @@ struct nvc0_vertex_stateobj { unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned vtx_size; unsigned vtx_per_packet_max; - struct nvc0_vertex_element element[1]; + struct nvc0_vertex_element element[0]; }; /* will have to lookup index -> location qualifier from nvc0_program */ -struct nvc0_tfb_state { - uint8_t varying_count[4]; +struct nvc0_transform_feedback_state { uint32_t stride[4]; - uint8_t varying_indices[1]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; }; #endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index cc0a65687dc..8898bc733a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -33,25 +33,15 @@ #include "nv50_defs.xml.h" +#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL + /* return TRUE for formats that can be converted among each other by NVC0_2D */ static INLINE boolean nvc0_2d_format_faithful(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - return TRUE; - default: - return FALSE; - } + uint8_t id = nvc0_format_table[format].rt; + + return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); } static INLINE uint8_t @@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format) /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. */ - if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0)))) + if (nvc0_2d_format_faithful(format)) return id; switch (util_format_get_blocksize(format)) { @@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format) return NV50_SURFACE_FORMAT_R16_UNORM; case 4: return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM; + case 8: + return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM; + case 16: + return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT; default: return 0; } @@ -249,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe, OUT_RING (chan, 1); OUT_RING (chan, 0); - /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */ - - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, 0x3c); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } @@ -306,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, OUT_RING (chan, sf->height); OUT_RING (chan, (1 << 16) | 1); - BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2); - OUT_RING (chan, (width << 16) | dstx); - OUT_RING (chan, (height << 16) | dsty); + BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2); + OUT_RING (chan, ((dstx + width) << 16) | dstx); + OUT_RING (chan, ((dsty + height) << 16) | dsty); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1); BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1); OUT_RING (chan, mode); + IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0); + nv50->dirty |= NVC0_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index b219f82c903..968558a5869 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) OUT_RINGp (chan, &tic->tic[3], 5); need_flush = TRUE; + } else + if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) { + BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); + OUT_RING (chan, (tic->id << 4) | 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING; + res->status |= NVC0_BUFFER_STATUS_GPU_READING; + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 950bee2eda4..a44d330c731 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i) static INLINE void bld_register_add_val(struct bld_register *reg, struct nv_value *val) { - util_dynarray_append(®->vals, struct nv_value *, val); + struct nv_basic_block *bb = val->insn->bb; + + if (reg->vals.size && + (util_dynarray_top(®->vals, struct nv_value *))->insn->bb == bb) + *(util_dynarray_top_ptr(®->vals, struct nv_value *)) = val; + else + util_dynarray_append(®->vals, struct nv_value *, val); } static INLINE boolean @@ -127,13 +133,10 @@ struct bld_context { static INLINE ubyte bld_register_file(struct bld_context *bld, struct bld_register *reg) { - if (reg < &bld->avs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->pvs[0][0]) return NV_FILE_GPR; - else - if (reg < &bld->ovs[0][0]) return NV_FILE_PRED; - else - return NV_FILE_MEM_V; + if (reg >= &bld->pvs[0][0] && + reg < &bld->ovs[0][0]) + return NV_FILE_PRED; + return NV_FILE_GPR; } static INLINE struct nv_value * @@ -361,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg, struct nv_basic_block *bb = bld->pc->current_block; struct nv_value *val = NULL; + if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */ + return NULL; + if (bld->loop_lvl > 1) { --bld->loop_lvl; if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl))) @@ -459,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) reg = (struct bld_register *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -470,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, phi, s++, val); + } } bld->pc->current_block = save; @@ -563,11 +573,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst, loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32)); - loc->reg.id = ofst * 4; + loc->reg.address = ofst * 4; nv_reference(bld->pc, insn, 0, loc); - nv_reference(bld->pc, insn, 1, ptr); - nv_reference(bld->pc, insn, 2, val); + nv_reference(bld->pc, insn, 1, val); + if (ptr) + bld_src_pointer(bld, insn, 2, ptr); } static struct nv_value * @@ -579,7 +590,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst) loc->reg.address = ofst * 4; - val = bld_insn_2(bld, NV_OP_LD, loc, ptr); + val = bld_insn_1(bld, NV_OP_LD, loc); + if (ptr) + bld_src_pointer(bld, val->insn, 1, ptr); return val; } @@ -650,7 +663,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src) static void bld_flow(struct bld_context *bld, uint opcode, - struct nv_value *src, struct nv_basic_block *target, + struct nv_value *pred, uint8_t cc, struct nv_basic_block *target, boolean reconverge) { struct nv_instruction *nvi; @@ -661,8 +674,10 @@ bld_flow(struct bld_context *bld, uint opcode, nvi = new_instruction(bld->pc, opcode); nvi->target = target; nvi->terminator = 1; - if (src) - bld_src_predicate(bld, nvi, 0, src); + if (pred) { + nvi->cc = cc; + bld_src_predicate(bld, nvi, 0, pred); + } } static ubyte @@ -878,11 +893,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, break; case TGSI_FILE_TEMPORARY: assert(idx < BLD_MAX_TEMPS); - if (!res->insn) + if (!res->insn || res->insn->bb != bld->pc->current_block) res = bld_insn_1(bld, NV_OP_MOV, res); assert(res->reg.file == NV_FILE_GPR); - assert(res->insn->bb = bld->pc->current_block); if (bld->ti->require_stores) bld_lmem_store(bld, ptr, idx * 4 + chan, res); @@ -979,14 +993,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b) } static struct nv_value * -bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c) -{ - if (bld->saved_inputs[i][c]) - return bld->saved_inputs[i][c]; - return NULL; -} - -static struct nv_value * bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) { unsigned cent = mode & NVC0_INTERP_CENTROID; @@ -1053,9 +1059,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: assert(!src->Register.Dimension); if (!ptr) { - res = bld_get_saved_input(bld, idx, swz); + res = bld->saved_inputs[idx][swz]; if (res) - return res; + break; } res = new_value(bld->pc, bld->ti->input_file, 4); if (ptr) @@ -1173,7 +1179,7 @@ static INLINE void describe_texture_target(unsigned target, int *dim, int *array, int *cube, int *shadow) { - *array = *cube = *shadow = 0; + *dim = *array = *cube = *shadow = 0; switch (target) { case TGSI_TEXTURE_1D: @@ -1199,11 +1205,6 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *cube = 1; break; - /* - case TGSI_TEXTURE_CUBE_ARRAY: - *dim = 2; - *cube = *array = 1; - break; case TGSI_TEXTURE_1D_ARRAY: *dim = *array = 1; break; @@ -1211,6 +1212,7 @@ describe_texture_target(unsigned target, int *dim, *dim = 2; *array = 1; break; + /* case TGSI_TEXTURE_SHADOW1D_ARRAY: *dim = *array = *shadow = 1; break; @@ -1220,7 +1222,7 @@ describe_texture_target(unsigned target, int *dim, break; case TGSI_TEXTURE_CUBE_ARRAY: *dim = 2; - *array = *cube = 1; + *cube = *array = 1; break; */ default: @@ -1338,10 +1340,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1383,21 +1381,12 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, nvi->tex_dim = dim; nvi->tex_cube = cube; nvi->tex_shadow = shadow; + nvi->tex_array = array; nvi->tex_live = 0; return nvi; } -/* -static boolean -bld_is_constant(struct nv_value *val) -{ - if (val->reg.file == NV_FILE_IMM) - return TRUE; - return val->insn && nvCG_find_constant(val->insn->src[0]); -} -*/ - static void bld_tex(struct bld_context *bld, struct nv_value *dst0[4], const struct tgsi_full_instruction *insn) @@ -1413,7 +1402,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], assert(dim + array + shadow + lodbias <= 5); - if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) + if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP) load_proj_tex_coords(bld, t, dim, shadow, insn); else { for (c = 0; c < dim + cube + array; ++c) @@ -1504,10 +1493,10 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_CMP: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { src0 = emit_fetch(bld, insn, 0, c); - src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero); src1 = emit_fetch(bld, insn, 1, c); src2 = emit_fetch(bld, insn, 2, c); - dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0); + dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0); + dst0[c]->insn->set_cond = NV_CC_LT; } break; case TGSI_OPCODE_COS: @@ -1601,6 +1590,7 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_IF: { struct nv_basic_block *b = new_basic_block(bld->pc); + struct nv_value *pred = emit_fetch(bld, insn, 0, 0); assert(bld->cond_lvl < BLD_MAX_COND_NESTING); @@ -1609,10 +1599,19 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, - emit_fetch(bld, insn, 0, 0), bld->zero); + if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) { + pred = bld_clone(bld, pred->insn); + pred->reg.size = 1; + pred->reg.file = NV_FILE_PRED; + if (pred->insn->opcode == NV_OP_FSET_F32) + pred->insn->opcode = NV_OP_SET_F32; + } else { + pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U, + pred, bld->zero); + } + assert(!mask); - bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); + bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0)); ++bld->cond_lvl; bld_new_block(bld, b); @@ -1638,6 +1637,10 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (bld->pc->current_block->exit && + !bld->pc->current_block->exit->terminator) + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE); + --bld->cond_lvl; nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind); nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); @@ -1678,7 +1681,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE); @@ -1690,7 +1693,7 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1705,9 +1708,11 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE); + if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */ + bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE); - nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK); + } bld_loop_end(bld, bb); /* replace loop-side operand of the phis */ @@ -1824,11 +1829,11 @@ bld_instruction(struct bld_context *bld, case TGSI_OPCODE_SSG: FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */ src0 = emit_fetch(bld, insn, 0, c); - src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero); - temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000)); - temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f)); - dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp); - bld_src_predicate(bld, dst0[c]->insn, 1, src1); + src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero); + src1->insn->set_cond = NV_CC_GT; + src2->insn->set_cond = NV_CC_LT; + dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2); } break; case TGSI_OPCODE_SUB: @@ -1892,10 +1897,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) { diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 286b382f58e..b279bdc6e7d 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -10,7 +10,8 @@ struct nvc0_transfer { struct pipe_transfer base; struct nvc0_m2mf_rect rect[2]; uint32_t nblocksx; - uint32_t nblocksy; + uint16_t nblocksy; + uint16_t nlayers; }; static void @@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, struct nvc0_miptree_level *lvl = &mt->level[level]; struct nvc0_transfer *tx; uint32_t size; - uint32_t w, h, d, z, layer; + uint32_t w, h, d, z, layer, box_h, box_y; int ret; + tx = CALLOC_STRUCT(nvc0_transfer); + if (!tx) + return NULL; + + box_y = box->y; + box_h = box->height; + if (mt->layout_3d) { z = box->z; d = u_minify(res->depth0, level); layer = 0; + tx->nlayers = box->depth; } else { z = 0; d = 1; - layer = box->z; + if (res->target == PIPE_TEXTURE_1D || + res->target == PIPE_TEXTURE_1D_ARRAY) { + box_y = 0; + box_h = 1; + layer = box->y; + tx->nlayers = box->height; + } else { + layer = box->z; + tx->nlayers = box->depth; + } } - tx = CALLOC_STRUCT(nvc0_transfer); - if (!tx) - return NULL; - pipe_resource_reference(&tx->base.resource, res); tx->base.level = level; @@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->base.box = *box; tx->nblocksx = util_format_get_nblocksx(res->format, box->width); - tx->nblocksy = util_format_get_nblocksy(res->format, box->height); + tx->nblocksy = util_format_get_nblocksy(res->format, box_h); tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format); tx->base.layer_stride = tx->nblocksy * tx->base.stride; @@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base = lvl->offset + layer * mt->layer_stride; tx->rect[0].tile_mode = lvl->tile_mode; tx->rect[0].x = util_format_get_nblocksx(res->format, box->x); - tx->rect[0].y = util_format_get_nblocksy(res->format, box->y); + tx->rect[0].y = util_format_get_nblocksy(res->format, box_y); tx->rect[0].z = z; tx->rect[0].width = util_format_get_nblocksx(res->format, w); tx->rect[0].height = util_format_get_nblocksy(res->format, h); @@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, size = tx->base.layer_stride; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - size * tx->base.box.depth, &tx->rect[1].bo); + size * tx->nlayers, &tx->rect[1].bo); if (ret) { FREE(tx); return NULL; @@ -304,8 +318,9 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[1].domain = NOUVEAU_BO_GART; if (usage & PIPE_TRANSFER_READ) { + unsigned base = tx->rect[0].base; unsigned i; - for (i = 0; i < box->depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) @@ -314,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, tx->rect[0].base += mt->layer_stride; tx->rect[1].base += size; } + tx->rect[0].z = z; + tx->rect[0].base = base; + tx->rect[1].base = 0; } - tx->rect[0].z = z; - tx->rect[1].base = 0; return &tx->base; } @@ -331,7 +347,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, unsigned i; if (tx->base.usage & PIPE_TRANSFER_WRITE) { - for (i = 0; i < tx->base.box.depth; ++i) { + for (i = 0; i < tx->nlayers; ++i) { nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index a51a887ed89..2db43d8704b 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -54,12 +54,13 @@ nvc0_vertex_state_create(struct pipe_context *pipe, assert(num_elements); so = MALLOC(sizeof(*so) + - (num_elements - 1) * sizeof(struct nvc0_vertex_element)); + num_elements * sizeof(struct nvc0_vertex_element)); if (!so) return NULL; so->num_elements = num_elements; so->instance_elts = 0; so->instance_bufs = 0; + so->need_conversion = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; @@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, return NULL; } so->element[i].state = nvc0_format_table[fmt].vtx; + so->need_conversion = TRUE; } so->element[i].state |= i; @@ -152,7 +154,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { /* TODO: use min and max instance divisor to get a proper range */ *base = 0; - *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride; + *size = nvc0->vtxbuf[vbi].buffer->width0; } else { assert(nvc0->vbo_max_index != ~0); *base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride; @@ -171,12 +173,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0) nvc0->vbo_fifo = nvc0->vbo_user = 0; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX); + for (i = 0; i < nvc0->num_vtxbufs; ++i) { vb = &nvc0->vtxbuf[i]; if (!vb->stride) continue; buf = nvc0_resource(vb->buffer); + /* NOTE: user buffers with temporary storage count as mapped by GPU */ if (!nvc0_resource_mapped_by_gpu(vb->buffer)) { if (nvc0->vbo_push_hint) { nvc0->vbo_fifo = ~0; @@ -227,16 +232,30 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) } offset = vb->buffer_offset + ve->src_offset; + MARK_RING (chan, 6, 4); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); OUT_RING (chan, i); - OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD); - OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD); + OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD); + OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD); OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD); OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD); } nvc0->vbo_dirty = TRUE; } +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) +{ + uint32_t vbo_user = nvc0->vbo_user; + + while (vbo_user) { + int i = ffs(vbo_user) - 1; + vbo_user &= ~(1 << i); + + nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer)); + } +} + void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) { @@ -246,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - nvc0_prevalidate_vbufs(nvc0); + if (unlikely(vertex->need_conversion)) { + nvc0->vbo_fifo = ~0; + nvc0->vbo_user = 0; + } else { + nvc0_prevalidate_vbufs(nvc0); + } BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements); for (i = 0; i < vertex->num_elements; ++i) { @@ -292,6 +316,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) size = vb->buffer->width0; offset = ve->pipe.src_offset + vb->buffer_offset; + MARK_RING (chan, 8, 4); BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1); OUT_RING (chan, (1 << 12) | vb->stride); BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5); @@ -346,57 +371,10 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan) { struct nvc0_context *nvc0 = chan->user_private; - nvc0_bufctx_emit_relocs(nvc0); -} + nvc0_screen_fence_update(nvc0->screen, TRUE); -#if 0 -static struct nouveau_bo * -nvc0_tfb_setup(struct nvc0_context *nvc0) -{ - struct nouveau_channel *chan = nvc0->screen->base.channel; - struct nouveau_bo *tfb = NULL; - int ret, i; - - ret = nouveau_bo_new(nvc0->screen->base.device, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb); - if (ret) - return NULL; - - ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR); - if (ret) - return NULL; - memset(tfb->map, 0xee, 8 * 4 * 3); - nouveau_bo_unmap(tfb); - - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5); - OUT_RING (chan, 1); - OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, tfb->size); - OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */ - BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3); - OUT_RING (chan, 0); - OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */ - OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */ - BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2); - OUT_RING (chan, 0x1f1e1d1c); - OUT_RING (chan, 0xa3a2a1a0); - for (i = 1; i < 4; ++i) { - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1); - OUT_RING (chan, 0); - } - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, RING_3D_(0x135c), 1); - OUT_RING (chan, 0); - - return tfb; + nvc0_bufctx_emit_relocs(nvc0); } -#endif static void nvc0_draw_arrays(struct nvc0_context *nvc0, @@ -422,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0, prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } static void @@ -592,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, } } - chan->flush_notify = NULL; + chan->flush_notify = nvc0_default_flush_notify; } void @@ -611,6 +589,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->vbo_min_index = info->min_index; nvc0->vbo_max_index = info->max_index; + if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo) + nvc0->dirty |= NVC0_NEW_ARRAYS; + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) nvc0_update_user_vbufs(nvc0); @@ -668,4 +649,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, info->instance_count, info->index_bias); } + + nvc0_release_user_vbufs(nvc0); } diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index 1544fb7a1de..45f71967eff 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res, unsigned delta, unsigned flags) { if (flags & NOUVEAU_BO_WR) - res->status |= NVC0_BUFFER_STATUS_DIRTY; + res->status |= NVC0_BUFFER_STATUS_GPU_WRITING; return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags); } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 01dacb43dad..b72379d6536 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -2,6 +2,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_transfer.h" #include "translate/translate.h" #include "nvfx_context.h" @@ -631,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx) nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index a43e83c0d36..4f86db39926 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -55,11 +55,12 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_viewport(r300->blitter, &r300->viewport); util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, - r300->vertex_buffer); + util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, + r300->vbuf_mgr->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) + if (op & (R300_CLEAR_SURFACE | R300_COPY)) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + } if (op & R300_COPY) { struct r300_textures_state* state = @@ -108,6 +109,14 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static boolean r300_fast_zclear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -132,37 +141,46 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* My notes about fastfill: + /* My notes about Zbuffer compression: * - * 1) Only the zbuffer is cleared. + * 1) The zbuffer must be micro-tiled and whole microtiles must be + * written if compression is enabled. If microtiling is disabled, + * it locks up. * - * 2) The zbuffer must be micro-tiled and whole microtiles must be - * written. If microtiling is disabled, it locks up. + * 2) There is ZMASK RAM which contains a compressed zbuffer. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel tiles, that is 2 bits for each tile. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. On newer chipsets, there is a new compression mode + * with 8x8 pixel tiles per 2 bits. * - * 3) There is Z Mask RAM which contains a compressed zbuffer and - * it interacts with fastfill. We should figure out how to use it - * to get more performance. - * This is what we know about the Z Mask: + * 3) The FASTFILL bit has nothing to do with filling. It only tells hw + * it should look in the ZMASK RAM first before fetching from a real + * zbuffer. * - * Each dword of the Z Mask contains compression information - * for 16 4x4 pixel blocks, that is 2 bits for each block. - * On chips with 2 Z pipes, every other dword maps to a different - * pipe. + * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned + * during zbuffer reads instead of the value that is actually stored + * in the zbuffer memory. A pixel is in a cleared state when its ZMASK + * is equal to 0. Therefore, if you clear ZMASK with zeros, you may + * leave the zbuffer memory uninitialized, but then you must enable + * compression, so that the ZMASK RAM is actually used. * - * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must - * be equal to 0. (clear the Z Mask RAM with zeros) + * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed + * during zbuffer updates. A special decompressing operation should be + * used to fully decompress a zbuffer, which basically just stores all + * compressed tiles in ZMASK to the zbuffer memory. * - * 5) For 16-bit zbuffer, compression causes a hung with one or + * 6) For a 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears * to avoid needless decompression. * - * 7) Fastfill must not be used if reading of compressed Z data is disabled + * 8) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. * - * 8) ZB_CB_CLEAR does not interact with fastfill in any way. + * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way. * * - Marek */ @@ -172,32 +190,30 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t hyperz_dcv = hyperz->zb_depthclearvalue; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); - /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); - if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) { + if (r300_fast_zclear_allowed(r300)) { r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } + if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) r300_mark_atom_dirty(r300, &r300->hiz_clear); + + /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); + * once hiz offset is constant. */ + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable CBZB clear. */ @@ -211,7 +227,7 @@ static void r300_clear(struct pipe_context* pipe, height = surf->cbzb_height; r300->cbzb_clear = TRUE; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Clear. */ @@ -225,7 +241,7 @@ static void r300_clear(struct pipe_context* pipe, buffers, rgba, depth, stencil); r300_blitter_end(r300); } else if (r300->zmask_clear.dirty) { - /* Just clear zmask and hiz now, this does not use a standard draw + /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; @@ -257,16 +273,15 @@ static void r300_clear(struct pipe_context* pipe, if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; hyperz->zb_depthclearvalue = hyperz_dcv; - r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); } /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (zstex && - (zstex->zmask_in_use[fb->zsbuf->u.tex.level] || - zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + if (r300->zmask_in_use || + (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -280,16 +295,16 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_render_target(r300->blitter, dst, rgba, dstx, dsty, width, height); r300_blitter_end(r300); + + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } /* Clear a region of a depth stencil surface. */ @@ -302,83 +317,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, unsigned width, unsigned height) { struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - /* Decompress zbuffers that are bound as textures. If we didn't flush here, - * it would happen inside the blitter when updating derived state, - * causing a blitter operation to be called from inside the blitter, - * which would overwrite saved states and they would never get restored. */ - r300_flush_depth_textures(r300); + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == dst->texture) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } r300_blitter_begin(r300, R300_CLEAR_SURFACE); util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); r300_blitter_end(r300); + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } } -/* Flush a depth stencil buffer. */ -static void r300_flush_depth_stencil(struct pipe_context *pipe, - struct pipe_resource *dst, - unsigned level, - unsigned layer) +void r300_decompress_zmask(struct r300_context *r300) { - struct r300_context *r300 = r300_context(pipe); - struct pipe_surface *dstsurf, surf_tmpl; - struct r300_texture *tex = r300_texture(dst); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!tex->zmask_mem[level]) - return; - if (!tex->zmask_in_use[level]) + if (!r300->zmask_in_use || r300->zmask_locked) return; - surf_tmpl.format = dst->format; - surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl); + r300->zmask_decompress = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300->z_decomp_rd = TRUE; - - r300_blitter_begin(r300, R300_CLEAR_SURFACE); - util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_begin(r300, R300_CLEAR); + util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, + r300->dsa_decompress_zmask); r300_blitter_end(r300); - r300->z_decomp_rd = FALSE; - tex->zmask_in_use[level] = FALSE; - pipe_surface_reference(&dstsurf, NULL); + r300->zmask_decompress = FALSE; + r300->zmask_in_use = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } -/* We can't use compressed zbuffers as samplers. */ -void r300_flush_depth_textures(struct r300_context *r300) +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300) { - struct r300_textures_state *state = - (struct r300_textures_state*)r300->textures_state.state; - unsigned i, level; - unsigned count = MIN2(state->sampler_view_count, - state->sampler_state_count); - - if (r300->z_decomp_rd) - return; - - for (i = 0; i < count; i++) - if (state->sampler_views[i] && state->sampler_states[i]) { - struct pipe_resource *tex = state->sampler_views[i]->base.texture; - - if (tex->target == PIPE_TEXTURE_3D || - tex->target == PIPE_TEXTURE_CUBE) - continue; + struct pipe_framebuffer_state fb = {0}; + fb.width = r300->locked_zbuffer->width; + fb.height = r300->locked_zbuffer->height; + fb.nr_cbufs = 0; + fb.zsbuf = r300->locked_zbuffer; + + r300->context.set_framebuffer_state(&r300->context, &fb); + r300_decompress_zmask(r300); +} - /* Ignore non-depth textures. - * Also ignore reinterpreted depth textures, e.g. resource_copy. */ - if (!util_format_is_depth_or_stencil(tex->format)) - continue; +void r300_decompress_zmask_locked(struct r300_context *r300) +{ + struct pipe_framebuffer_state saved_fb = {0}; - for (level = 0; level <= tex->last_level; level++) - if (r300_texture(tex)->zmask_in_use[level]) { - /* We don't handle 3D textures and cubemaps yet. */ - r300_flush_depth_stencil(&r300->context, tex, level, 0); - } - } + util_copy_framebuffer_state(&saved_fb, r300->fb_state.state); + r300_decompress_zmask_locked_unsafe(r300); + r300->context.set_framebuffer_state(&r300->context, &saved_fb); + util_unreference_framebuffer_state(&saved_fb); } /* Copy a block of pixels from one surface to another using HW. */ @@ -393,8 +395,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300_blitter_begin(r300, R300_COPY); - - /* Do a copy */ util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, TRUE); r300_blitter_end(r300); @@ -409,56 +409,102 @@ static void r300_resource_copy_region(struct pipe_context *pipe, unsigned src_level, const struct pipe_box *src_box) { - enum pipe_format old_format = dst->format; - enum pipe_format new_format = old_format; - boolean is_depth; - - if (!pipe->screen->is_format_supported(pipe->screen, - old_format, src->target, - src->nr_samples, - PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0) && - util_format_is_plain(old_format)) { - switch (util_format_get_blocksize(old_format)) { + struct r300_context *r300 = r300_context(pipe); + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct pipe_resource old_src = *src; + struct pipe_resource old_dst = *dst; + struct pipe_resource new_src = old_src; + struct pipe_resource new_dst = old_dst; + const struct util_format_description *desc = + util_format_description(dst->format); + struct pipe_box box; + + if (r300->zmask_in_use && !r300->zmask_locked) { + if (fb->zsbuf->texture == src || + fb->zsbuf->texture == dst) { + r300_decompress_zmask(r300); + } else { + r300->zmask_locked = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); + } + } + + /* Handle non-renderable plain formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || + !pipe->screen->is_format_supported(pipe->screen, + src->format, src->target, + src->nr_samples, + PIPE_BIND_SAMPLER_VIEW, 0) || + !pipe->screen->is_format_supported(pipe->screen, + dst->format, dst->target, + dst->nr_samples, + PIPE_BIND_RENDER_TARGET, 0))) { + switch (util_format_get_blocksize(old_dst.format)) { case 1: - new_format = PIPE_FORMAT_I8_UNORM; + new_dst.format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_B4G4R4A4_UNORM; + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_B8G8R8A8_UNORM; + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; break; case 8: - new_format = PIPE_FORMAT_R16G16B16A16_UNORM; + new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" "r300: surface_copy: Software fallback doesn't work for tiled textures.\n", - util_format_short_name(old_format)); + util_format_short_name(dst->format)); } + new_src.format = new_dst.format; } - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) { - r300_flush_depth_stencil(pipe, src, src_level, src_box->z); - } + /* Handle compressed formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + switch (util_format_get_blocksize(old_dst.format)) { + case 8: + /* 1 pixel = 4 bits, + * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM; + break; + case 16: + /* 1 pixel = 8 bits, + * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */ + new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; + break; + } - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, new_format); - r300_texture_reinterpret_format(pipe->screen, - src, new_format); + /* Since the pixels are 4 times larger, we must decrease + * the image size and the coordinates 4 times. */ + new_src.format = new_dst.format; + new_dst.height0 = (new_dst.height0 + 3) / 4; + new_src.height0 = (new_src.height0 + 3) / 4; + dsty /= 4; + box = *src_box; + box.y /= 4; + box.height = (box.height + 3) / 4; + src_box = &box; } + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &new_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &new_dst); + r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); - if (old_format != new_format) { - r300_texture_reinterpret_format(pipe->screen, - dst, old_format); - r300_texture_reinterpret_format(pipe->screen, - src, old_format); + if (old_src.format != new_src.format) + r300_resource_set_properties(pipe->screen, src, 0, &old_src); + if (old_dst.format != new_dst.format) + r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); + + if (r300->zmask_locked) { + r300->zmask_locked = FALSE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 9d3d4fc1b19..b373937a1f9 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -61,40 +61,52 @@ * that they neatly hide away, and don't have the cost of function setup, so * we're going to use them. */ -#ifdef DEBUG -#define CB_DEBUG(x) x -#else -#define CB_DEBUG(x) -#endif - - /** * Command buffer setup. */ +#ifdef DEBUG + #define CB_LOCALS \ - CB_DEBUG(int cs_count = 0;) \ + int cs_count = 0; \ uint32_t *cs_ptr = NULL; \ - CB_DEBUG((void) cs_count;) (void) cs_ptr; + (void) cs_count; (void) cs_ptr -#define NEW_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ - CB_DEBUG(cs_count = size;) \ +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr); \ } while (0) -#define BEGIN_CB(ptr, size) do { \ - assert(sizeof(*ptr) == sizeof(uint32_t)); \ - cs_ptr = ptr; \ - CB_DEBUG(cs_count = size;) \ +#define NEW_CB(ptr, size) \ + do { \ + assert(sizeof(*(ptr)) == sizeof(uint32_t)); \ + cs_count = (size); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ } while (0) #define END_CB do { \ - CB_DEBUG(if (cs_count != 0) \ + if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ - cs_count, __FUNCTION__, __FILE__, __LINE__);) \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ } while (0) +#define CB_USED_DW(x) cs_count -= x + +#else + +#define CB_LOCALS \ + uint32_t *cs_ptr = NULL; (void) cs_ptr + +#define NEW_CB(ptr, size) \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)) + +#define BEGIN_CB(ptr, size) cs_ptr = (ptr) +#define END_CB +#define CB_USED_DW(x) + +#endif + /** * Storing pure DWORDs. @@ -103,13 +115,13 @@ #define OUT_CB(value) do { \ *cs_ptr = (value); \ cs_ptr++; \ - CB_DEBUG(cs_count--;) \ + CB_USED_DW(1); \ } while (0) #define OUT_CB_TABLE(values, count) do { \ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ cs_ptr += count; \ - CB_DEBUG(cs_count -= count;) \ + CB_USED_DW(count); \ } while (0) #define OUT_CB_32F(value) \ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 2b183f62c56..1968d0feb35 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5A42: caps->family = CHIP_FAMILY_RS400; caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: @@ -424,5 +423,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f2035d20092..0be161fa07a 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,6 +33,13 @@ #define PIPE_ZMASK_SIZE 4096 #define RV3xx_ZMASK_SIZE 5120 +/* The size of a compressed tile. Each compressed tile takes 2 bits + * in the ZMASK RAM, so there is always 16 tiles per one dword. */ +enum r300_zmask_compression { + R300_ZCOMP_4X4 = 4, + R300_ZCOMP_8X8 = 8 +}; + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -50,10 +57,12 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + /* Some chipsets do not have HiZ RAM - other have varying amounts. */ int hiz_ram; - /* some chipsets have zmask ram per pipe some don't */ + /* Some chipsets have zmask ram per pipe some don't. */ int zmask_ram; + /* Compression mode for ZMASK. */ + enum r300_zmask_compression z_compress; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 91263ad7bcd..9f85bd4ce5f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -35,26 +35,24 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -#ifdef HAVE_LLVM -#include "gallivm/lp_bld_init.h" -#endif - static void r300_update_num_contexts(struct r300_screen *r300screen, int diff) { + pipe_mutex_lock(r300screen->num_contexts_mutex); if (diff > 0) { - p_atomic_inc(&r300screen->num_contexts); + r300screen->num_contexts++; if (r300screen->num_contexts > 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_MULTITHREADED); } else { - p_atomic_dec(&r300screen->num_contexts); + r300screen->num_contexts--; if (r300screen->num_contexts <= 1) util_slab_set_thread_safety(&r300screen->pool_buffers, UTIL_SLAB_SINGLETHREADED); } + pipe_mutex_unlock(r300screen->num_contexts_mutex); } static void r300_release_referenced_objects(struct r300_context *r300) @@ -87,17 +85,14 @@ static void r300_release_referenced_objects(struct r300_context *r300) /* The SWTCL VBO. */ pipe_resource_reference(&r300->vbo, NULL); - /* Vertex buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); - } - /* If there are any queries pending or not destroyed, remove them now. */ foreach_s(query, temp, &r300->query_list) { remove_from_list(query); FREE(query); } + + r300->context.delete_depth_stencil_alpha_state(&r300->context, + r300->dsa_decompress_zmask); } static void r300_destroy_context(struct pipe_context* context) @@ -106,28 +101,15 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->blitter) util_blitter_destroy(r300->blitter); - if (r300->draw) { + if (r300->draw) draw_destroy(r300->draw); -#ifdef HAVE_LLVM - gallivm_destroy(r300->gallivm); -#endif - } - - if (r300->upload_vb) - u_upload_destroy(r300->upload_vb); - if (r300->upload_ib) - u_upload_destroy(r300->upload_ib); - - if (r300->tran.translate_cache) - translate_cache_destroy(r300->tran.translate_cache); + if (r300->vbuf_mgr) + u_vbuf_mgr_destroy(r300->vbuf_mgr); /* XXX: This function assumes r300->query_list was initialized */ r300_release_referenced_objects(r300); - if (r300->zmask_mm) - r300_hyperz_destroy_mm(r300); - if (r300->cs) r300->rws->cs_destroy(r300->cs); @@ -247,7 +229,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (has_hiz_ram) R300_INIT_ATOM(hiz_clear, 0); /* zmask clear */ - R300_INIT_ATOM(zmask_clear, 0); + R300_INIT_ATOM(zmask_clear, 4); } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -432,12 +414,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300screen->caps.has_tcl) { /* Create a Draw. This is used for SW TCL. */ -#ifdef HAVE_LLVM - r300->gallivm = gallivm_create(); - r300->draw = draw_create_gallivm(&r300->context, r300->gallivm); -#else r300->draw = draw_create(&r300->context); -#endif if (r300->draw == NULL) goto fail; /* Enable our renderer. */ @@ -456,6 +433,13 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_init_resource_functions(r300); + r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!r300->vbuf_mgr) + goto fail; + r300->blitter = util_blitter_create(&r300->context); if (r300->blitter == NULL) goto fail; @@ -470,23 +454,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300_hyperz_init_mm(r300)) goto fail; - r300->upload_ib = u_upload_create(&r300->context, - 64 * 1024, 16, - PIPE_BIND_INDEX_BUFFER); - - if (r300->upload_ib == NULL) - goto fail; - - r300->upload_vb = u_upload_create(&r300->context, - 1024 * 1024, 16, - PIPE_BIND_VERTEX_BUFFER); - if (r300->upload_vb == NULL) - goto fail; - - r300->tran.translate_cache = translate_cache_create(); - if (r300->tran.translate_cache == NULL) - goto fail; - r300_init_states(&r300->context); /* The KIL opcode needs the first texture unit to be enabled @@ -515,7 +482,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } { - struct pipe_resource vb = {}; + struct pipe_resource vb; + memset(&vb, 0, sizeof(vb)); vb.target = PIPE_BUFFER; vb.format = PIPE_FORMAT_R8_UNORM; vb.bind = PIPE_BIND_VERTEX_BUFFER; @@ -527,36 +495,44 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->dummy_vb = screen->resource_create(screen, &vb); } + { + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); + dsa.depth.writemask = 1; + + r300->dsa_decompress_zmask = + r300->context.create_depth_stencil_alpha_state(&r300->context, + &dsa); + } + + /* Print driver info. */ +#ifdef NDEBUG + if (DBG_ON(r300, DBG_INFO)) { +#else + { +#endif + fprintf(stderr, + "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" + "r300: GART size: %d MB, VRAM size: %d MB\n" + "r300: AA compression: %s, Z compression: %s, HiZ: %s\n", + rws->get_value(rws, R300_VID_DRM_MAJOR), + rws->get_value(rws, R300_VID_DRM_MINOR), + rws->get_value(rws, R300_VID_DRM_PATCHLEVEL), + screen->get_name(screen), + rws->get_value(rws, R300_VID_PCI_ID), + rws->get_value(rws, R300_VID_GB_PIPES), + rws->get_value(rws, R300_VID_Z_PIPES), + rws->get_value(rws, R300_VID_GART_SIZE) >> 20, + rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, + rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.hiz_ram ? "YES" : "NO"); + } + return &r300->context; - fail: +fail: r300_destroy_context(&r300->context); return NULL; } - -void r300_finish(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb; - unsigned i; - - /* This is a preliminary implementation of glFinish. - * - * The ideal implementation should use something like EmitIrqLocked and - * WaitIrq, or better, real fences. - */ - if (r300->fb_state.state) { - fb = r300->fb_state.state; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (fb->cbufs[i]->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->cbufs[i]->texture)->buffer); - return; - } - } - if (fb->zsbuf && fb->zsbuf->texture) { - r300->rws->buffer_wait(r300->rws, - r300_texture(fb->zsbuf->texture)->buffer); - } - } -} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e09cf87f733..30073759476 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,11 +30,11 @@ #include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_transfer.h" - -#include "translate/translate_cache.h" +#include "util/u_vbuf_mgr.h" #include "r300_defines.h" #include "r300_screen.h" +#include "r300_winsys.h" struct u_upload_mgr; struct r300_context; @@ -189,11 +189,6 @@ struct r300_sampler_view { uint32_t texcache_region; }; -struct r300_texture_fb_state { - uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ -}; - struct r300_texture_sampler_state { struct r300_texture_format_state format; uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ @@ -273,14 +268,12 @@ struct r300_query { /* How many results have been written, in dwords. It's incremented * after end_query and flush. */ unsigned num_results; - /* if we've flushed the query */ - boolean flushed; /* if begin has been emitted */ boolean begin_emitted; /* The buffer where query results are stored. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; /* The size of the buffer. */ unsigned buffer_size; /* The domain of the buffer. */ @@ -291,28 +284,12 @@ struct r300_query { struct r300_query* next; }; -/* Fence object. - * - * This is a fake fence. Instead of syncing with the fence, we sync - * with the context, which is inefficient but compliant. - * - * This is not a subclass of pipe_fence_handle because pipe_fence_handle is - * never actually fully defined. So, rather than have it as a member, and do - * subclass-style casting, we treat pipe_fence_handle as an opaque, and just - * trust that our state tracker does not ever mess up fence objects. - */ -struct r300_fence { - struct pipe_reference reference; - struct r300_context *ctx; - boolean signalled; -}; - struct r300_surface { struct pipe_surface base; /* Winsys buffer backing the texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; @@ -329,13 +306,9 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; - }; struct r300_texture_desc { - /* Parent class. */ - struct u_resource b; - /* Width, height, and depth. * Most of the time, these are equal to pipe_texture::width0, height0, * and depth0. However, NPOT 3D textures must have dimensions aligned @@ -387,27 +360,38 @@ struct r300_texture_desc { /* Whether CBZB fast color clear is allowed on the miplevel. */ boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; + + /* Zbuffer compression info for each miplevel. */ + boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; + /* If zero, then disable compression. */ + unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; }; -struct r300_texture { - struct r300_texture_desc desc; +struct r300_resource +{ + struct u_vbuf_resource b; + /* Winsys buffer backing this resource. */ + struct r300_winsys_bo *buf; + struct r300_winsys_cs_handle *cs_buf; enum r300_buffer_domain domain; + unsigned buf_size; + + /* Constant buffers are in user memory. */ + uint8_t *constant_buffer; - /* Pipe buffer backing this texture. */ - struct r300_winsys_buffer *buffer; - struct r300_winsys_cs_buffer *cs_buffer; + /* Texture description (addressing, layout, special features). */ + struct r300_texture_desc tex; /* Registers carrying texture format data. */ /* Only format-independent bits should be filled in. */ struct r300_texture_format_state tx_format; - /* All bits should be filled in. */ - struct r300_texture_fb_state fb_state; - /* hyper-z memory allocs */ + /* Where the texture starts in the buffer. */ + unsigned tex_offset; + + /* HiZ memory allocations. */ struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; - boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS]; boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; /* This is the level tiling flags were last time set for. @@ -418,32 +402,16 @@ struct r300_texture { struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + unsigned format_size[PIPE_MAX_ATTRIBS]; - /* If (velem[i].src_format != hw_format[i]), the vertex buffer - * referenced by this vertex element cannot be used for rendering and - * its vertex data must be translated to hw_format[i]. */ - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + struct u_vbuf_mgr_elements *vmgr_elements; /* The size of the vertex, in dwords. */ unsigned vertex_size_dwords; - /* This might mean two things: - * - src_format != hw_format, as discussed above. - * - src_offset % 4 != 0. */ - boolean incompatible_layout; - struct r300_vertex_stream_state vertex_stream; }; -struct r300_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - - /* Saved and new vertex element state. */ - void *saved_velems, *new_velems; -}; - struct r300_context { /* Parent class */ struct pipe_context context; @@ -456,7 +424,6 @@ struct r300_context { struct r300_screen *screen; /* Draw module. Used mostly for SW TCL. */ - struct gallivm_state *gallivm; struct draw_context* draw; /* Vertex buffer for SW TCL. */ struct pipe_resource* vbo; @@ -471,8 +438,6 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; - /* For translating vertex buffers having incompatible vertex layout. */ - struct r300_translate_context tran; /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this @@ -554,14 +519,6 @@ struct r300_context { /* The pointers to the first and the last atom. */ struct r300_atom *first_dirty, *last_dirty; - /* Vertex buffers for Gallium. */ - /* May contain user buffers. */ - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - /* Contains only non-user buffers. */ - struct pipe_resource *valid_vertex_buffer[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - int vertex_buffer_max_index; - boolean any_user_vbs; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; @@ -588,21 +545,23 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ - boolean incompatible_vb_layout; -#define R300_Z_COMPRESS_44 1 -#define RV350_Z_COMPRESS_88 2 - int z_compression; + boolean cbzb_clear; - boolean z_decomp_rd; + /* Whether ZMASK is enabled. */ + boolean zmask_in_use; + /* Whether ZMASK is being decompressed. */ + boolean zmask_decompress; + /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ + boolean zmask_locked; + /* The zbuffer the ZMASK of which is locked. */ + struct pipe_surface *locked_zbuffer; + + void *dsa_decompress_zmask; /* two mem block managers for hiz/zmask ram space */ struct mem_block *hiz_mm; - struct mem_block *zmask_mm; - /* upload managers */ - struct u_upload_mgr *upload_vb; - struct u_upload_mgr *upload_ib; + struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -612,16 +571,10 @@ struct r300_context { /* const tracking for VS */ int vs_const_base; - /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ - uint32_t vertex_arrays_cb[(16 * 3 + 1) / 2]; + /* Vertex array state info */ boolean vertex_arrays_dirty; - - /* Whether any buffer (FB, textures, VBOs) has been set, but buffers - * haven't been validated yet. */ - boolean validate_buffers; - /* Whether user buffers have been validated. */ - boolean upload_vb_validated; - boolean upload_ib_validated; + boolean vertex_arrays_indexed; + int vertex_arrays_offset; }; #define foreach_atom(r300, atom) \ @@ -641,9 +594,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) return (struct r300_surface*)surf; } -static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) +static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex) { - return (struct r300_texture*)tex; + return (struct r300_resource*)tex; } static INLINE struct r300_context* r300_context(struct pipe_context* context) @@ -675,7 +628,6 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300, struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -void r300_finish(struct r300_context *r300); void r300_flush_cb(void *data); /* Context initialization. */ @@ -688,7 +640,9 @@ void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); /* r300_blit.c */ -void r300_flush_depth_textures(struct r300_context *r300); +void r300_decompress_zmask(struct r300_context *r300); +void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); +void r300_decompress_zmask_locked(struct r300_context *r300); /* r300_query.c */ void r300_resume_query(struct r300_context *r300, @@ -696,9 +650,6 @@ void r300_resume_query(struct r300_context *r300, void r300_stop_query(struct r300_context *r300); /* r300_render_translate.c */ -void r300_begin_vertex_translate(struct r300_context *r300, - int min_index, int max_index); -void r300_end_vertex_translate(struct r300_context *r300); void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, @@ -714,8 +665,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG, - R300_CHANGED_ZCLEAR_FLAG, + R300_CHANGED_HYPERZ_FLAG, R300_CHANGED_MULTIWRITE }; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 6726f100e1b..2e52dfa43c6 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -35,12 +35,6 @@ * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#ifdef DEBUG -#define CS_DEBUG(x) x -#else -#define CS_DEBUG(x) -#endif - /** * Command submission setup. */ @@ -50,22 +44,29 @@ struct r300_winsys_screen *cs_winsys = (context)->rws; \ int cs_count = 0; (void) cs_count; (void) cs_winsys; +#ifdef DEBUG + #define BEGIN_CS(size) do { \ assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \ - CS_DEBUG(cs_count = size;) \ + cs_count = size; \ } while (0) -#ifdef DEBUG #define END_CS do { \ if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ cs_count, __FUNCTION__, __FILE__, __LINE__); \ cs_count = 0; \ } while (0) + +#define CS_USED_DW(x) cs_count -= (x) + #else + +#define BEGIN_CS(size) #define END_CS -#endif +#define CS_USED_DW(x) +#endif /** * Writing pure DWORDs. @@ -73,7 +74,7 @@ #define OUT_CS(value) do { \ cs_copy->buf[cs_copy->cdw++] = (value); \ - CS_DEBUG(cs_count--;) \ + CS_USED_DW(1); \ } while (0) #define OUT_CS_32F(value) \ @@ -98,7 +99,7 @@ #define OUT_CS_TABLE(values, count) do { \ memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \ cs_copy->cdw += count; \ - CS_DEBUG(cs_count -= count;) \ + CS_USED_DW(count); \ } while (0) @@ -106,27 +107,11 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset) do { \ - assert(bo); \ - OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo); \ - CS_DEBUG(cs_count -= 2;) \ -} while (0) - -#define OUT_CS_BUF_RELOC(bo, offset) do { \ - assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \ -} while (0) - -#define OUT_CS_TEX_RELOC(tex, offset) do { \ - assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset); \ -} while (0) - -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \ - assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \ - CS_DEBUG(cs_count -= 2;) \ +#define OUT_CS_RELOC(r) do { \ + assert((r)); \ + assert((r)->cs_buf); \ + cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \ + CS_USED_DW(2); \ } while (0) @@ -135,7 +120,7 @@ */ #define WRITE_CS_TABLE(values, count) do { \ - CS_DEBUG(assert(cs_count == 0);) \ + assert(cs_count == 0); \ memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \ cs_copy->cdw += (count); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index d6aa90bd053..b60cfd1f248 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -27,6 +27,7 @@ #include <stdio.h> static const struct debug_named_value debug_options[] = { + { "info", DBG_INFO, "Print hardware info"}, { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, @@ -49,6 +50,8 @@ static const struct debug_named_value debug_options[] = { { "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, { "noopt", DBG_NO_OPT, "Disable shader optimizations" }, { "nocbzb", DBG_NO_CBZB, "Disable fast color clear" }, + { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" }, + { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index d14cdcbbaf0..e2e4719ec82 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -89,7 +89,7 @@ static void get_rc_constant_state( struct rc_constant * constant) { struct r300_textures_state* texstate = r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -101,19 +101,19 @@ static void get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); - vec[0] = 1.0 / tex->desc.width0; - vec[1] = 1.0 / tex->desc.height0; + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->tex.width0; + vec[1] = 1.0 / tex->tex.height0; vec[2] = 0; vec[3] = 1; break; case RC_STATE_R300_TEXSCALE_FACTOR: - tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); /* Add a small number to the texture size to work around rounding errors in hw. */ - vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); - vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); - vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); + vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f); + vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f); + vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f); vec[3] = 1; break; @@ -352,11 +352,9 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset); - - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch); + OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_RELOC(aa->dest); + OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -391,11 +389,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(fb->cbufs[i]); - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset); + OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch); + OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); + OUT_CS_RELOC(surf); } /* Set up the ZB part of the CBZB clear. */ @@ -404,11 +402,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); + OUT_CS_RELOC(surf); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -420,19 +418,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset); + OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); + OUT_CS_RELOC(surf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch); + OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); + OUT_CS_RELOC(surf); if (can_hyperz) { uint32_t surf_pitch; - struct r300_texture *tex; + struct r300_resource *tex; int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); + tex = r300_resource(surf->base.texture); surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ if (r300->screen->caps.hiz_ram) { if (tex->hiz_mem[level]) { @@ -443,14 +442,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); } } + /* Z Mask RAM. (compressed zbuffer) */ - if (tex->zmask_mem[level]) { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); } } @@ -462,6 +457,7 @@ void r300_emit_hyperz_state(struct r300_context *r300, { struct r300_hyperz_state *z = state; CS_LOCALS(r300); + if (z->flush) WRITE_CS_TABLE(&z->cb_flush_begin, size); else @@ -563,14 +559,12 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; - query->flushed = FALSE; } static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -588,25 +582,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4); + OUT_CS_RELOC(r300->query_current); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4); + OUT_CS_RELOC(r300->query_current); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -622,13 +616,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -636,16 +629,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { - struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); - OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4); + OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); + OUT_CS_RELOC(r300->query_current); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -784,7 +776,7 @@ void r300_emit_textures_state(struct r300_context *r300, { struct r300_textures_state *allstate = (struct r300_textures_state*)state; struct r300_texture_sampler_state *texstate; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; CS_LOCALS(r300); @@ -794,7 +786,7 @@ void r300_emit_textures_state(struct r300_context *r300, for (i = 0; i < allstate->count; i++) { if ((1 << i) & allstate->tx_enable) { texstate = &allstate->regs[i]; - tex = r300_texture(allstate->sampler_views[i]->base.texture); + tex = r300_resource(allstate->sampler_views[i]->base.texture); OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0); OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1); @@ -805,96 +797,56 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config); + OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); + OUT_CS_RELOC(tex); } } END_CS; } -static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size) +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) { - struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; + struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2, vertex_array_count = r300->velems->count; + struct r300_resource *buf; int i; - CB_LOCALS; + unsigned vertex_array_count = r300->velems->count; + unsigned packet_size = (vertex_array_count * 3 + 1) / 2; + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size; + unsigned size1, size2; + CS_LOCALS(r300); + + BEGIN_CS(2 + packet_size + vertex_array_count * 2); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + + hw_format_size = r300->velems->format_size; - BEGIN_CB(r300->vertex_arrays_cb, packet_size); for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; - OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CB(vb1->buffer_offset + velem[i].src_offset); - } - END_CB; - - r300->vertex_arrays_dirty = FALSE; -} - -void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_resource **valid_vbuf = r300->valid_vertex_buffer; - struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; - int i; - unsigned vertex_array_count = r300->velems->count; - unsigned packet_size = (vertex_array_count * 3 + 1) / 2; - CS_LOCALS(r300); - - BEGIN_CS(2 + packet_size + vertex_array_count * 2); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - - if (!offset) { - if (r300->vertex_arrays_dirty) { - r300_update_vertex_arrays_cb(r300, packet_size); - } - OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size); - } else { - struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size = r300->velems->hw_format_size; - unsigned size1, size2; - - for (i = 0; i < vertex_array_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; - - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); - } - - if (vertex_array_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; - - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - } + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } for (i = 0; i < vertex_array_count; i++) { - buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b); + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); } END_CS; } @@ -919,7 +871,8 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0); + OUT_CS(0); + OUT_CS_RELOC(r300_resource(r300->vbo)); END_CS; } @@ -1097,17 +1050,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1 END_CS; } -static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) @@ -1118,13 +1060,13 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) (struct r300_hyperz_state*)r300->hyperz_state.state; struct r300_screen* r300screen = r300->screen; uint32_t stride, offset = 0, height, offset_shift; - struct r300_texture* tex; + struct r300_resource* tex; int i; - tex = r300_texture(fb->zsbuf->texture); + tex = r300_resource(fb->zsbuf->texture); offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; + stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; /* convert from pixels to 4x4 blocks */ stride = ALIGN_DIVUP(stride, 4); @@ -1153,42 +1095,21 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0; - struct r300_texture* tex; - uint32_t i, height; - int mult, offset_shift; - - tex = r300_texture(fb->zsbuf->texture); - stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level]; - - offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - offset_shift = 4; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + struct r300_resource *tex; + CS_LOCALS(r300); - /* okay have width in pixels - divide by block width */ - stride = ALIGN_DIVUP(stride, mult); - /* have width in blocks - divide by number of fragment pipes screen width */ - /* 16 blocks per dword */ - stride = ALIGN_DIVUP(stride, 16); + tex = r300_resource(fb->zsbuf->texture); - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); - } + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(0); + OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(0); + END_CS; /* Mark the current zbuffer's zmask as in use. */ - tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->zmask_in_use = TRUE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_ztop_state(struct r300_context* r300, @@ -1219,7 +1140,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *tex; + struct r300_resource *tex; unsigned i; boolean flushed = FALSE; @@ -1227,16 +1148,16 @@ validate: if (r300->fb_state.dirty) { /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + tex = r300_resource(fb->cbufs[i]->texture); + assert(tex && tex->buf && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + tex = r300_resource(fb->zsbuf->texture); + assert(tex && tex->buf && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, r300_surface(fb->zsbuf)->domain); } } @@ -1247,35 +1168,35 @@ validate: continue; } - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0); + tex = r300_resource(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer, + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, + r300_resource(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ - if (do_validate_vertex_buffers) { - struct pipe_resource **buf = r300->valid_vertex_buffer; - struct pipe_resource **last = r300->valid_vertex_buffer + - r300->vertex_buffer_count; + if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { + struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; + struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + + r300->vbuf_mgr->nr_real_vertex_buffers; for (; buf != last; buf++) { if (!*buf) continue; - r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf, - r300_buffer(*buf)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, + r300_resource(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, + r300_resource(index_buffer)->domain, 0); /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index b250532ba92..c77cc08539d 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -36,16 +36,24 @@ static void r300_flush(struct pipe_context* pipe, struct pipe_fence_handle** fence) { struct r300_context *r300 = r300_context(pipe); - struct r300_query *query; struct r300_atom *atom; - struct r300_fence **rfence = (struct r300_fence**)fence; - - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); + struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence; if (r300->draw && !r300->draw_vbo_locked) r300_draw_flush_vbuf(r300); + if (rfence) { + /* Create a fence, which is a dummy BO. */ + *rfence = r300->rws->buffer_create(r300->rws, 1, 1, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + R300_DOMAIN_GTT); + /* Add the fence as a dummy relocation. */ + r300->rws->cs_add_reloc(r300->cs, + r300->rws->buffer_get_cs_handle(*rfence), + R300_DOMAIN_GTT, R300_DOMAIN_GTT); + } + if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); @@ -62,32 +70,29 @@ static void r300_flush(struct pipe_context* pipe, r300_mark_atom_dirty(r300, atom); } } + r300->vertex_arrays_dirty = TRUE; /* Unmark HWTCL state for SWTCL. */ if (!r300->screen->caps.has_tcl) { r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; } - - r300->validate_buffers = TRUE; - r300->upload_vb_validated = FALSE; - r300->upload_ib_validated = FALSE; } else { - /* Even if hw is not dirty, we should at least reset the CS in case - * the space checking failed for the first draw operation. */ - r300->rws->cs_flush(r300->cs); - } - - /* reset flushed query */ - foreach(query, &r300->query_list) { - query->flushed = TRUE; + if (rfence) { + /* We have to create a fence object, but the command stream is empty + * and we cannot emit an empty CS. We must write some regs then. */ + CS_LOCALS(r300); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); + r300->rws->cs_flush(r300->cs); + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs); + } } - /* Create a new fence. */ - if (rfence) { - *rfence = CALLOC_STRUCT(r300_fence); - pipe_reference_init(&(*rfence)->reference, 1); - (*rfence)->ctx = r300; + if (flags & PIPE_FLUSH_FRAME) { + r300->rws->cs_sync_flush(r300->cs); } } diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 6d4091dc87d..cec7473009a 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -152,13 +152,13 @@ static void get_external_state( for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; - struct r300_texture *t; + struct r300_resource *t; if (!s || !v) { continue; } - t = r300_texture(texstate->sampler_views[i]->base.texture); + t = r300_resource(texstate->sampler_views[i]->base.texture); if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { state->unit[i].compare_mode_enabled = 1; @@ -181,7 +181,7 @@ static void get_external_state( state->unit[i].non_normalized_coords = !s->state.normalized_coords; /* XXX this should probably take into account STR, not just S. */ - if (t->desc.is_npot) { + if (t->tex.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -201,7 +201,7 @@ static void get_external_state( state->unit[i].wrap_mode = RC_WRAP_NONE; } - if (t->desc.b.b.target == PIPE_TEXTURE_3D) + if (t->b.b.b.target == PIPE_TEXTURE_3D) state->unit[i].clamp_and_scale_before_fetch = TRUE; } } @@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer( } } else { /* r300 */ struct r300_fragment_program_code *code = &generic_code->code.r300; - - shader->cb_code_size = 19 + - (r300->screen->caps.is_r400 ? 2 : 0) + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - imm_count * 5; + unsigned int alu_length = code->alu.length; + unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; + unsigned int tex_length = code->tex.length; + unsigned int tex_iterations = + tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; + unsigned int iterations = + alu_iterations > tex_iterations ? alu_iterations : tex_iterations; + unsigned int bank = 0; + + shader->cb_code_size = 15 + + /* R400_US_CODE_BANK */ + (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + + /* R400_US_CODE_EXT */ + (r300->screen->caps.is_r400 ? 2 : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ + (code->r390_mode ? (5 * alu_iterations) : 4) + + /* R400_US_ALU_EXT_ADDR_[0-63] */ + (code->r390_mode ? (code->alu.length) : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ + code->alu.length * 4 + + /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ + (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); - if (r300->screen->caps.is_r400) - OUT_CB_REG(R400_US_CODE_BANK, 0); - OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + if (code->r390_mode) { + OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); + } else if (r300->screen->caps.is_r400) { + /* This register appears to affect shaders even if r390_mode is + * disabled, so it needs to be set to 0 for shaders that + * don't use r390_mode. */ + OUT_CB_REG(R400_US_CODE_EXT, 0); + } + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); OUT_CB_TABLE(code->code_addr, 4); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_inst); + do { + unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); + unsigned int bank_alu_offset = bank * 64; + unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); + unsigned int bank_tex_offset = bank * 32; + + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? + (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 + } + + if (bank_alu_length > 0) { + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_addr); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_inst); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_addr); + if (code->r390_mode) { + OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); + } + } + + if (bank_tex_length > 0) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); + OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); + } + + alu_length -= bank_alu_length; + tex_length -= bank_tex_length; + bank++; + } while(code->r390_mode && (alu_length > 0 || tex_length > 0)); - if (code->tex.length) { - OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CB_TABLE(code->tex.inst, code->tex.length); + /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders + * will be rendered incorrectly. */ + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, + code->r390_mode ? R400_R390_MODE_ENABLE : 0); } /* Emit immediates. */ @@ -384,12 +438,17 @@ static void r300_translate_fragment_shader( compiler.code = &shader->code; compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); compiler.Base.has_half_swizzles = TRUE; compiler.Base.has_presub = TRUE; - compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_temp_regs = + compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; - compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_alu_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; + compiler.Base.max_tex_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; @@ -414,6 +473,13 @@ static void r300_translate_fragment_shader( r300_tgsi_to_rc(&ttr, tokens); + if (ttr.error) { + fprintf(stderr, "r300 FP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_fragment_shader(r300, shader); + return; + } + if (!r300->screen->caps.is_r500 || compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index c22e307c679..873e0209d42 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300) z->current_func, dsa_state->z_stencil_control); return FALSE; } - } + } return TRUE; } @@ -137,9 +137,8 @@ static void r300_update_hyperz(struct r300_context* r300) (struct r300_hyperz_state*)r300->hyperz_state.state; struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_texture *zstex = - fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL; - boolean zmask_in_use = FALSE; + struct r300_resource *zstex = + fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; @@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300) if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level]; hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Z fastfill. */ - if (zmask_in_use) { - z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ - } - /* Zbuffer compression. */ - if (zmask_in_use && r300->z_compression) { - z->zb_bw_cntl |= R300_RD_COMP_ENABLE; - if (r300->z_decomp_rd == false) + if (r300->zmask_in_use && !r300->zmask_locked) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | + /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ + R300_RD_COMP_ENABLE; + + if (!r300->zmask_decompress) { z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + } } - /* RV350 and up optimizations. */ - /* The section 10.4.9 in the docs is a lie. */ - if (r300->z_compression == RV350_Z_COMPRESS_88) + + if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) { z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + if (hiz_in_use && r300_can_hiz(r300)) { + z->zb_bw_cntl |= R300_HIZ_ENABLE | + r300_get_hiz_min(r300); - if (hiz_in_use) { - bool can_hiz = r300_can_hiz(r300); - if (can_hiz) { - z->zb_bw_cntl |= R300_HIZ_ENABLE; - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; - z->sc_hyperz |= r300_get_sc_hz_max(r300); - z->zb_bw_cntl |= r300_get_hiz_min(r300); + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | + R500_HIZ_EQUAL_REJECT_ENABLE; } } /* R500-specific features and optimizations. */ if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; - z->zb_bw_cntl |= - R500_HIZ_EQUAL_REJECT_ENABLE | - R500_PEQ_PACKING_ENABLE | - R500_COVERED_PTR_MASKING_ENABLE; + z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; } } @@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300) r300->hiz_clear.size = height * 4; } -static void r300_update_zmask_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - int mult; - - if (r300->z_compression == RV350_Z_COMPRESS_88) - mult = 8; - else - mult = 4; - - height = ALIGN_DIVUP(fb->zsbuf->height, mult); - - r300->zmask_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } @@ -324,64 +305,24 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hiz_clear.dirty) { r300_update_hiz_clear(r300); } - if (r300->zmask_clear.dirty) { - r300_update_zmask_clear(r300); - } } void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) { - struct r300_texture *tex; + struct r300_resource *tex; uint32_t zsize, ndw; int level = surf->base.u.tex.level; - tex = r300_texture(surf->base.texture); + tex = r300_resource(surf->base.texture); if (tex->hiz_mem[level]) return; - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); + zsize = tex->tex.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->b.b.b.format); ndw = ALIGN_DIVUP(zsize, 64); tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); - return; -} - -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) -{ - int bsize = 256; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - struct r300_texture *tex; - - tex = r300_texture(surf->base.texture); - - /* We currently don't handle decompression for 3D textures and cubemaps - * correctly. */ - if (tex->desc.b.b.target != PIPE_TEXTURE_1D && - tex->desc.b.b.target != PIPE_TEXTURE_2D && - tex->desc.b.b.target != PIPE_TEXTURE_RECT) - return; - - /* Cannot flush zmask of 16-bit zbuffers. */ - if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16) - return; - - if (tex->zmask_mem[level]) - return; - - zsize = tex->desc.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->desc.b.b.format); - - /* each zmask dword represents 16 4x4 blocks - which is 256 pixels - or 16 8x8 depending on the gb peq flag = 1024 pixels */ - if (compress == RV350_Z_COMPRESS_88) - bsize = 1024; - - ndw = ALIGN_DIVUP(zsize, bsize); - tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); - return; } boolean r300_hyperz_init_mm(struct r300_context *r300) @@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300) struct r300_screen* r300screen = r300->screen; int frag_pipes = r300screen->caps.num_frag_pipes; - r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); - if (!r300->zmask_mm) - return FALSE; - if (r300screen->caps.hiz_ram) { r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); if (!r300->hiz_mm) { - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; return FALSE; } } @@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300) u_mmDestroy(r300->hiz_mm); r300->hiz_mm = NULL; } - - u_mmDestroy(r300->zmask_mm); - r300->zmask_mm = NULL; } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 30a23ec6493..d4c8e7c60a9 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -28,8 +28,8 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); -void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); boolean r300_hyperz_init_mm(struct r300_context *r300); void r300_hyperz_destroy_mm(struct r300_context *r300); + #endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 6223e043210..717485f43cb 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, q->domain); - q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer); + q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf); return (struct pipe_query*)q; } @@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_context *r300 = r300_context(pipe); struct r300_query* q = r300_query(query); - r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); + r300_winsys_bo_reference(&q->buf, NULL); remove_from_list(q); FREE(query); } @@ -128,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_query *q = r300_query(query); - unsigned flags, i; + unsigned i; uint32_t temp, *map; - uint64_t *result = (uint64_t*)vresult; - - if (!q->flushed) - pipe->flush(pipe, 0, NULL); - - flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); + map = r300->rws->buffer_map(q->buf, r300->cs, + PIPE_TRANSFER_READ | + (!wait ? PIPE_TRANSFER_DONTBLOCK : 0)); if (!map) return FALSE; @@ -148,9 +143,9 @@ static boolean r300_get_query_result(struct pipe_context* pipe, map++; } - r300->rws->buffer_unmap(r300->rws, q->buffer); + r300->rws->buffer_unmap(q->buf); - *result = temp; + *((uint64_t*)vresult) = temp; return TRUE; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d1154dee40a..1d93dab2ca2 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* R4xx extended fragment shader registers. */ #define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ -# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01 -# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02 -# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04 +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) # define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 -# define R400_ADDR0_EXT_A_MSB_BIT 0x10 -# define R400_ADDR1_EXT_A_MSB_BIT 0x20 -# define R400_ADDR2_EXT_A_MSB_BIT 0x40 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) # define R400_ADDRD_EXT_A_MSB_BIT 0x80 + #define R400_US_CODE_BANK 0x46b8 # define R400_BANK_SHIFT 0 # define R400_BANK_MASK 0xf diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b35822c82f8..2ead8667bda 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -127,6 +127,20 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) END_CS; } +static void r300_emit_draw_init(struct r300_context *r300, unsigned mode, + unsigned min_index, unsigned max_index) +{ + CS_LOCALS(r300); + + BEGIN_CS(5); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(max_index); + OUT_CS(min_index); + END_CS; +} + /* This function splits the index bias value into two parts: * - buffer_offset: the value that can be safely added to buffer offsets * in r300_emit_vertex_arrays (it must yield a positive offset when added to @@ -136,7 +150,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias) static void r300_split_index_bias(struct r300_context *r300, int index_bias, int *buffer_offset, int *index_offset) { - struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer; + struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; unsigned i, size; int max_neg_bias; @@ -235,24 +249,11 @@ static boolean r300_emit_states(struct r300_context *r300, /* Validate buffers and emit dirty state if needed. */ if (first_draw) { - if (r300->validate_buffers) { - if (!r300_emit_buffer_validate(r300, validate_vbos, - index_buffer)) { - fprintf(stderr, "r300: CS space validation failed. " - "(not enough memory?) Skipping rendering.\n"); - return FALSE; - } - - /* Consider the validation done only if everything was validated. */ - if (validate_vbos) { - r300->validate_buffers = FALSE; - if (r300->any_user_vbs) - r300->upload_vb_validated = TRUE; - if (r300->index_buffer.buffer && - r300_is_user_buffer(r300->index_buffer.buffer)) { - r300->upload_ib_validated = TRUE; - } - } + if (!r300_emit_buffer_validate(r300, validate_vbos, + index_buffer)) { + fprintf(stderr, "r300: CS space validation failed. " + "(not enough memory?) Skipping rendering.\n"); + return FALSE; } r300_emit_dirty_state(r300); @@ -263,9 +264,17 @@ static boolean r300_emit_states(struct r300_context *r300, r500_emit_index_bias(r300, 0); } - if (emit_vertex_arrays) + if (emit_vertex_arrays && + (r300->vertex_arrays_dirty || + r300->vertex_arrays_indexed != indexed || + r300->vertex_arrays_offset != buffer_offset)) { r300_emit_vertex_arrays(r300, buffer_offset, indexed); + r300->vertex_arrays_dirty = FALSE; + r300->vertex_arrays_indexed = indexed; + r300->vertex_arrays_offset = buffer_offset; + } + if (emit_vertex_arrays_swtcl) r300_emit_vertex_arrays_swtcl(r300, indexed); } @@ -294,7 +303,8 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_FIRST_DRAW; - return r300_emit_states(r300, flags, index_buffer, buffer_offset, index_bias); + return r300_emit_states(r300, flags, index_buffer, buffer_offset, + index_bias); } static boolean immd_is_good_idea(struct r300_context *r300, @@ -325,17 +335,12 @@ static boolean immd_is_good_idea(struct r300_context *r300, vbi = velem->vertex_buffer_index; if (!checked[vbi]) { - buf = r300->valid_vertex_buffer[vbi]; + buf = r300->vbuf_mgr->real_vertex_buffer[vbi]; - if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) { + if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) { return FALSE; } - if (r300_buffer_is_referenced(&r300->context, buf, - R300_REF_CS | R300_REF_HW)) { - /* It's a very bad idea to map it... */ - return FALSE; - } checked[vbi] = TRUE; } } @@ -346,10 +351,9 @@ static boolean immd_is_good_idea(struct r300_context *r300, * The HWTCL draw functions. * ****************************************************************************/ -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, unsigned start, + unsigned count) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -360,7 +364,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ - unsigned dwords = 9 + count * vertex_size; + unsigned dwords = 4 + count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; @@ -370,9 +374,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; CS_LOCALS(r300); @@ -382,29 +385,25 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - size[i] = r300->velems->hw_format_size[i] / 4; + size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; - vbuf = &r300->vertex_buffer[vbi]; + vbuf = &r300->vbuf_mgr->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!transfer[vbi]) { - map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, - r300->valid_vertex_buffer[vbi], - PIPE_TRANSFER_READ, - &transfer[vbi]); + if (!map[vbi]) { + map[vbi] = (uint32_t*)r300->rws->buffer_map( + r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf, + r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } + r300_emit_draw_init(r300, mode, 0, count-1); + BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); @@ -421,9 +420,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (transfer[vbi]) { - pipe_buffer_unmap(&r300->context, transfer[vbi]); - transfer[vbi] = NULL; + if (map[vbi]) { + r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf); + map[vbi] = NULL; } } } @@ -441,15 +440,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300, return; } - BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); + r300_emit_draw_init(r300, mode, 0, count-1); + + BEGIN_CS(2 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -471,22 +467,16 @@ static void r300_emit_draw_elements(struct r300_context *r300, boolean alt_num_verts = count > 65535; CS_LOCALS(r300); - if (count >= (1 << 24)) { + if (count >= (1 << 24) || maxIndex >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render.\n", count); + "refusing to render (maxIndex: %i).\n", count, maxIndex); return; } DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); - BEGIN_CS(5); - OUT_CS_REG(R300_GA_COLOR_CONTROL, - r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(maxIndex); - OUT_CS(minIndex); - END_CS; + r300_emit_draw_init(r300, mode, minIndex, maxIndex); /* If start is odd, render the first triangle with indices embedded * in the command stream. This will increase start by 3 and make it @@ -527,30 +517,105 @@ static void r300_emit_draw_elements(struct r300_context *r300, (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } - /* INDX_BUFFER is a truly special packet3. - * Unlike most other packet3, where the offset is after the count, - * the order is reversed, so the relocation ends up carrying the - * size of the indexbuf instead of the offset. - */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords); + OUT_CS(count_dwords); + OUT_CS_RELOC(r300_resource(indexBuffer)); + END_CS; +} + +static void r300_draw_elements_immediate(struct r300_context *r300, + int indexBias, unsigned minIndex, + unsigned maxIndex, unsigned mode, + unsigned start, unsigned count) +{ + uint8_t *ptr1; + uint16_t *ptr2; + uint32_t *ptr4; + unsigned index_size = r300->index_buffer.index_size; + unsigned i, count_dwords = index_size == 4 ? count : (count + 1) / 2; + CS_LOCALS(r300); + + /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias)) + return; + r300_emit_draw_init(r300, mode, minIndex, maxIndex); + + BEGIN_CS(2 + count_dwords); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); + + switch (index_size) { + case 1: + ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr1 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr1[i+1] + indexBias) << 16) | + (ptr1[i] + indexBias)); + + if (count & 1) + OUT_CS(ptr1[i] + indexBias); + } else { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr1[i+1]) << 16) | + (ptr1[i] )); + + if (count & 1) + OUT_CS(ptr1[i]); + } + break; + + case 2: + ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr2 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count-1; i += 2) + OUT_CS(((ptr2[i+1] + indexBias) << 16) | + (ptr2[i] + indexBias)); + + if (count & 1) + OUT_CS(ptr2[i] + indexBias); + } else { + OUT_CS_TABLE(ptr2, count_dwords); + } + break; + + case 4: + ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; + ptr4 += start; + + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(mode)); + + if (indexBias && !r300->screen->caps.index_bias_supported) { + for (i = 0; i < count; i++) + OUT_CS(ptr4[i] + indexBias); + } else { + OUT_CS_TABLE(ptr4, count_dwords); + } + break; + } END_CS; } -/* This is the fast-path drawing & emission for HW TCL. */ -static void r300_draw_range_elements(struct pipe_context* pipe, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_elements(struct r300_context *r300, int indexBias, + unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count) { - struct r300_context* r300 = r300_context(pipe); struct pipe_resource *indexBuffer = r300->index_buffer.buffer; unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; @@ -570,30 +635,28 @@ static void r300_draw_range_elements(struct pipe_context* pipe, /* Fallback for misaligned ushort indices. */ if (indexSize == 2 && (start & 1) && - !r300_is_user_buffer(indexBuffer)) { - struct pipe_transfer *transfer; - struct pipe_resource *userbuf; - - uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &transfer); + !r300_resource(indexBuffer)->b.user_ptr) { + /* If we got here, then orgIndexBuffer == indexBuffer. */ + uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf, + r300->cs, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED); if (mode == PIPE_PRIM_TRIANGLES) { memcpy(indices3, ptr + start, 6); } else { /* Copy the mapped index buffer directly to the upload buffer. * The start index will be aligned simply from the fact that - * every sub-buffer in u_upload_mgr is aligned. */ - userbuf = pipe->screen->user_buffer_create(pipe->screen, - ptr, 0, - PIPE_BIND_INDEX_BUFFER); - indexBuffer = userbuf; - r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); - pipe_resource_reference(&userbuf, NULL); + * every sub-buffer in the upload buffer is aligned. */ + r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, + count, (uint8_t*)ptr); } - pipe_buffer_unmap(pipe, transfer); + r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf); } else { - if (r300_is_user_buffer(indexBuffer)) - r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count); + if (r300_resource(indexBuffer)->b.user_ptr) + r300_upload_index_buffer(r300, &indexBuffer, indexSize, + &start, count, + r300_resource(indexBuffer)->b.user_ptr); } /* 19 dwords for emit_draw_elements. Give up if the function fails. */ @@ -607,7 +670,11 @@ static void r300_draw_range_elements(struct pipe_context* pipe, minIndex, maxIndex, mode, start, count, indices3); } else { do { - short_count = MIN2(count, 65534); + if (indexSize == 2 && (start & 1)) + short_count = MIN2(count, 65535); + else + short_count = MIN2(count, 65534); + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, short_count, indices3); @@ -631,43 +698,38 @@ done: } } -static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, +static void r300_draw_arrays(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) { - struct r300_context* r300 = r300_context(pipe); boolean alt_num_verts = r300->screen->caps.is_r500 && count > 65536 && r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; - if (immd_is_good_idea(r300, count)) { - r300_emit_draw_arrays_immediate(r300, mode, start, count); + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (!r300_prepare_for_rendering(r300, + PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + NULL, 9, start, 0)) + return; + + if (alt_num_verts || count <= 65535) { + r300_emit_draw_arrays(r300, mode, count); } else { - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) - return; + do { + short_count = MIN2(count, 65535); + r300_emit_draw_arrays(r300, mode, short_count); - if (alt_num_verts || count <= 65535) { - r300_emit_draw_arrays(r300, mode, count); - } else { - do { - short_count = MIN2(count, 65535); - r300_emit_draw_arrays(r300, mode, short_count); - - start += short_count; - count -= short_count; - - /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ - if (count) { - if (!r300_prepare_for_rendering(r300, - PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) - return; - } - } while (count); - } + start += short_count; + count -= short_count; + + /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ + if (count) { + if (!r300_prepare_for_rendering(r300, + PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, + start, 0)) + return; + } + } while (count); } } @@ -676,83 +738,45 @@ static void r300_draw_vbo(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); unsigned count = info->count; - boolean translate = FALSE; + boolean buffers_updated, uploader_flushed; boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned min_index = 0; - unsigned max_index = r300->vertex_buffer_max_index; + unsigned start_indexed = info->start + r300->index_buffer.offset; + int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index); - if (r300->skip_rendering) { + if (r300->skip_rendering || + !u_trim_pipe_prim(info->mode, &count)) { return; } - if (!u_trim_pipe_prim(info->mode, &count)) { - return; + /* Start the vbuf manager and update buffers if needed. */ + u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, + &buffers_updated, &uploader_flushed); + if (buffers_updated) { + r300->vertex_arrays_dirty = TRUE; } - if (indexed) { - int real_min_index, real_max_index; - /* Compute the start for draw_elements, taking the offset into account. */ - unsigned start_indexed = - info->start + - (r300->index_buffer.offset / r300->index_buffer.index_size); - - assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); - - /* Index buffer range checking. */ - if ((start_indexed + count) * r300->index_buffer.index_size > - r300->index_buffer.buffer->width0) { - fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); - return; - } - - min_index = MAX2(min_index, info->min_index); - max_index = MIN2(max_index, info->max_index); - real_min_index = (int)min_index - info->index_bias; - real_max_index = (int)max_index - info->index_bias; - - if (max_index >= (1 << 24) - 1) { - fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index); - return; - } - - r300_update_derived_state(r300); - - /* Set up the fallback for an incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300, real_min_index, real_max_index); - translate = TRUE; - } + /* Draw. */ + r300_update_derived_state(r300); - /* Upload vertex buffers. */ - if (r300->any_user_vbs) { - r300_upload_user_buffers(r300, real_min_index, real_max_index); + if (indexed) { + if (count <= 8 && + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { + r300_draw_elements_immediate(r300, info->index_bias, + info->min_index, max_index, + info->mode, start_indexed, count); + } else { + r300_draw_elements(r300, info->index_bias, info->min_index, + max_index, info->mode, start_indexed, count); } - - r300_draw_range_elements(pipe, info->index_bias, min_index, max_index, - info->mode, start_indexed, count); } else { - min_index = MAX2(min_index, info->start); - max_index = MIN2(max_index, info->start + count - 1); - - r300_update_derived_state(r300); - - /* Set up the fallback for an incompatible vertex layout if needed. */ - if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { - r300_begin_vertex_translate(r300, min_index, max_index); - translate = TRUE; - } - - /* Upload vertex buffers. */ - if (r300->any_user_vbs) { - r300_upload_user_buffers(r300, min_index, max_index); + if (immd_is_good_idea(r300, count)) { + r300_draw_arrays_immediate(r300, info->mode, info->start, count); + } else { + r300_draw_arrays(r300, info->mode, info->start, count); } - - r300_draw_arrays(pipe, info->mode, info->start, count); } - if (translate) { - r300_end_vertex_translate(r300); - } + u_vbuf_mgr_draw_end(r300->vbuf_mgr); } /**************************************************************************** @@ -787,11 +811,12 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { void *buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, + r300->vbuf_mgr->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } @@ -799,7 +824,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, if (indexed) { indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, - PIPE_TRANSFER_READ, &ib_transfer); + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer); } draw_set_mapped_index_buffer(r300->draw, indices); @@ -810,8 +836,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, draw_flush(r300->draw); r300->draw_vbo_locked = FALSE; - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (r300->vertex_buffer[i].buffer) { + for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) { + if (r300->vbuf_mgr->vertex_buffer[i].buffer) { pipe_buffer_unmap(pipe, vb_transfer[i]); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } @@ -874,10 +900,10 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, pipe_resource_reference(&r300->vbo, NULL); r300->vbo = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, R300_MAX_DRAW_VBO_SIZE); r300->draw_vbo_offset = 0; r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE; - r300->validate_buffers = TRUE; } r300render->vertex_size = vertex_size; @@ -896,7 +922,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, r300->vbo, - PIPE_TRANSFER_WRITE, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED, &r300render->vbo_transfer); assert(r300render->vbo_ptr); @@ -963,7 +990,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 6, 0, 0)) + NULL, dwords, 0, 0)) return; } else { if (!r300_emit_states(r300, @@ -972,23 +999,6 @@ static void r300_render_draw_arrays(struct vbuf_render* render, return; } - /* Uncomment to dump all VBOs rendered through this interface. - * Slow and noisy! - ptr = pipe_buffer_map(&r300render->r300->context, - r300render->vbo, PIPE_TRANSFER_READ, - &r300render->vbo_transfer); - - for (i = 0; i < count; i++) { - printf("r300: Vertex %d\n", i); - draw_dump_emitted_vertex(&r300->vertex_info, ptr); - ptr += r300->vertex_info.size * 4; - printf("\n"); - } - - pipe_buffer_unmap(&r300render->r300->context, r300render->vbo, - r300render->vbo_transfer); - */ - BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); @@ -1250,7 +1260,7 @@ static void r300_resource_resolve(struct pipe_context* pipe, aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; - r300->aa_state.size = 12; + r300->aa_state.size = 10; r300_mark_atom_dirty(r300, &r300->aa_state); /* Resolve the surface. */ diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c index c48062c8084..f8c7558f4b4 100644 --- a/src/gallium/drivers/r300/r300_render_translate.c +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -20,207 +20,64 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * The functions below translate vertex and index buffers to the layout - * compatible with the hardware, so that all vertex and index fetches are - * DWORD-aligned and all used vertex and index formats are supported. - * For indices, an optional index offset is added to each index. - */ - #include "r300_context.h" -#include "translate/translate.h" #include "util/u_index_modify.h" +#include "util/u_upload_mgr.h" -/* XXX Optimization: use min_index and translate only that range. */ -/* XXX Use the uploader. */ -void r300_begin_vertex_translate(struct r300_context *r300, - int min_index, int max_index) -{ - struct pipe_context *pipe = &r300->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r300_vertex_element_state *ve = r300->velems; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - unsigned slot; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->velem[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->velem[i].vertex_buffer_index; - te->input_format = ve->velem[i].src_format; - te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(r300->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = max_index + 1; - - out_buffer = pipe_buffer_create(&r300->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - slot = ~0; - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&r300->valid_vertex_buffer[i], out_buffer); - vb->buffer_offset = 0; - vb->stride = key.output_stride; - slot = i; - /* XXX probably need to preserve the real count for u_blitter_save_*. */ - r300->vertex_buffer_count = MAX2(r300->vertex_buffer_count, i+1); - r300->validate_buffers = TRUE; - break; - } - } - /* XXX This may fail. */ - assert(slot != ~0); - - /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - r300->tran.saved_velems = r300->velems; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->velem[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->velem[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = slot; - } else { - memcpy(&new_velems[i], &ve->velem[i], - sizeof(struct pipe_vertex_element)); - } - } - - r300->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} -void r300_end_vertex_translate(struct r300_context *r300) -{ - struct pipe_context *pipe = &r300->context; - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); - pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); -} - -/* XXX Use the uploader. */ void r300_translate_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned *index_size, unsigned index_offset, unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count); - *index_size = 2; - *start = 0; - r300->validate_buffers = TRUE; - break; + case 1: + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r300->context, *index_buffer, index_offset, + *start, count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *index_size = 2; + *start = out_offset / 2; + break; + + case 2: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 2; + } + break; - case 2: - if (index_offset) { - util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + case 4: + if (index_offset) { + u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4, + &out_offset, &out_buffer, &flushed, &ptr); - case 4: - if (index_offset) { - util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count); - *start = 0; - r300->validate_buffers = TRUE; - } - break; + util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer, + index_offset, *start, + count, ptr); + + *index_buffer = NULL; + pipe_resource_reference(index_buffer, out_buffer); + *start = out_offset / 4; + } + break; } } diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index dd1df970594..f3d8c5b889f 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -38,26 +38,22 @@ r300_resource_create(struct pipe_screen *screen, } -static struct pipe_resource * -r300_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) +static unsigned r300_resource_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned level, int layer) { - if (templ->target == PIPE_BUFFER) - return NULL; - else - return r300_texture_from_handle(screen, templ, whandle); + return r300_buffer_is_referenced(context, buf); } void r300_init_resource_functions(struct r300_context *r300) { r300->context.get_transfer = u_get_transfer_vtbl; r300->context.transfer_map = u_transfer_map_vtbl; - r300->context.transfer_flush_region = u_transfer_flush_region_vtbl; + r300->context.transfer_flush_region = u_default_transfer_flush_region; r300->context.transfer_unmap = u_transfer_unmap_vtbl; r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r300->context.is_resource_referenced = u_is_resource_referenced_vtbl; + r300->context.is_resource_referenced = r300_resource_is_referenced_by_cs; r300->context.create_surface = r300_create_surface; r300->context.surface_destroy = r300_surface_destroy; } @@ -65,8 +61,8 @@ void r300_init_resource_functions(struct r300_context *r300) void r300_init_screen_resource_functions(struct r300_screen *r300screen) { r300screen->screen.resource_create = r300_resource_create; - r300screen->screen.resource_from_handle = r300_resource_from_handle; - r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl; + r300screen->screen.resource_from_handle = r300_texture_from_handle; + r300screen->screen.resource_get_handle = r300_resource_get_handle; r300screen->screen.resource_destroy = u_resource_destroy_vtbl; r300screen->screen.user_buffer_create = r300_user_buffer_create; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c75aeaa10a7..77a9c6ad86f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -34,10 +34,6 @@ #include "draw/draw_context.h" -#ifdef HAVE_LLVM -#include "gallivm/lp_bld_init.h" -#endif - /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. * @@ -87,12 +83,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen) static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct r300_screen* r300screen = r300_screen(pscreen); - boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -129,11 +121,13 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DUAL_SOURCE_BLEND: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ + case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_ARRAY_TEXTURES: return 0; /* Texturing. */ @@ -175,9 +169,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (shader) { case PIPE_SHADER_FRAGMENT: @@ -407,6 +398,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); util_slab_destroy(&r300screen->pool_buffers); + pipe_mutex_destroy(r300screen->num_contexts_mutex); if (rws) rws->destroy(rws); @@ -418,32 +410,28 @@ static void r300_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct r300_fence **oldf = (struct r300_fence**)ptr; - struct r300_fence *newf = (struct r300_fence*)fence; - - if (pipe_reference(&(*oldf)->reference, &newf->reference)) - FREE(*oldf); - - *ptr = fence; + r300_winsys_bo_reference((struct r300_winsys_bo**)ptr, + (struct r300_winsys_bo*)fence); } static int r300_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flags) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - return rfence->signalled ? 0 : 1; /* 0 == success */ + return !rws->buffer_is_busy(rfence) ? 0 : 1; /* 0 == success */ } static int r300_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flags) { - struct r300_fence *rfence = (struct r300_fence*)fence; + struct r300_winsys_screen *rws = r300_screen(screen)->rws; + struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - r300_finish(rfence->ctx); - rfence->signalled = TRUE; + rws->buffer_wait(rfence); return 0; /* 0 == success */ } @@ -463,12 +451,19 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); + if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK)) + r300screen->caps.zmask_ram = 0; + if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) + r300screen->caps.hiz_ram = 0; + r300screen->caps.index_bias_supported = r300screen->caps.is_r500 && rws->get_value(rws, R300_VID_DRM_2_3_0); + pipe_mutex_init(r300screen->num_contexts_mutex); + util_slab_create(&r300screen->pool_buffers, - sizeof(struct r300_buffer), 64, + sizeof(struct r300_resource), 64, UTIL_SLAB_SINGLETHREADED); r300screen->rws = rws; @@ -490,9 +485,5 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) util_format_s3tc_init(); -#ifdef HAVE_LLVM - lp_build_init(); -#endif - return &r300screen->screen; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 752f53b7579..576f9c1f4a9 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -52,6 +52,7 @@ struct r300_screen { /* The number of created contexts to know whether we have multiple * contexts or not. */ int num_contexts; + pipe_mutex num_contexts_mutex; }; @@ -94,6 +95,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_HYPERZ (1 << 12) #define DBG_SCISSOR (1 << 13) #define DBG_UPLOAD (1 << 14) +#define DBG_INFO (1 << 15) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) @@ -101,6 +103,8 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FAKE_OCC (1 << 19) #define DBG_NO_OPT (1 << 20) #define DBG_NO_CBZB (1 << 21) +#define DBG_NO_ZMASK (1 << 22) +#define DBG_NO_HIZ (1 << 23) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index cc3c1d7687e..1045911f3ae 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -34,117 +34,50 @@ #include "r300_winsys.h" unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain) + struct pipe_resource *buf) { struct r300_context *r300 = r300_context(context); - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_resource *rbuf = r300_resource(buf); - if (r300_is_user_buffer(buf)) + if (rbuf->b.user_ptr || rbuf->constant_buffer) return PIPE_UNREFERENCED; - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; } -static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - return r300_buffer_is_referenced(context, buf, R300_REF_CS); -} - void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, - unsigned count) + unsigned count, uint8_t *ptr) { unsigned index_offset; - uint8_t *ptr = r300_buffer(*index_buffer)->user_buffer; boolean flushed; *index_buffer = NULL; - u_upload_data(r300->upload_ib, + u_upload_data(r300->vbuf_mgr->uploader, 0, count * index_size, ptr + (*start * index_size), &index_offset, index_buffer, &flushed); *start = index_offset / index_size; - - if (flushed || !r300->upload_ib_validated) { - r300->upload_ib_validated = FALSE; - r300->validate_buffers = TRUE; - } -} - -void r300_upload_user_buffers(struct r300_context *r300, - int min_index, int max_index) -{ - int i, nr = r300->velems->count; - unsigned count = max_index + 1 - min_index; - boolean flushed; - boolean uploaded[16] = {0}; - - for (i = 0; i < nr; i++) { - unsigned index = r300->velems->velem[i].vertex_buffer_index; - struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index]; - struct r300_buffer *userbuf = r300_buffer(vb->buffer); - - if (userbuf && userbuf->user_buffer && !uploaded[index]) { - unsigned first, size; - - if (vb->stride) { - first = vb->stride * min_index; - size = vb->stride * count; - } else { - first = 0; - size = r300->velems->hw_format_size[i]; - } - - DBG(r300, DBG_UPLOAD, - "Uploading %i bytes, index: %i, buffer: %p, userptr: %p " - "offset: %i, stride: %i.\n", - size, index, userbuf, userbuf->user_buffer, - vb->buffer_offset, vb->stride); - - u_upload_data(r300->upload_vb, first, size, - userbuf->user_buffer + first, - &vb->buffer_offset, - &r300->valid_vertex_buffer[index], - &flushed); - - vb->buffer_offset -= first; - - r300->vertex_arrays_dirty = TRUE; - - if (flushed || !r300->upload_vb_validated) { - r300->upload_vb_validated = FALSE; - r300->validate_buffers = TRUE; - } - uploaded[index] = TRUE; - } else { - assert(r300->valid_vertex_buffer[index]); - } - } - DBG(r300, DBG_UPLOAD, "-------\n"); } static void r300_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); - struct r300_winsys_screen *rws = r300screen->rws; + struct r300_resource *rbuf = r300_resource(buf); if (rbuf->constant_buffer) FREE(rbuf->constant_buffer); if (rbuf->buf) - rws->buffer_reference(rws, &rbuf->buf, NULL); + r300_winsys_bo_reference(&rbuf->buf, NULL); util_slab_free(&r300screen->pool_buffers, rbuf); } @@ -188,15 +121,15 @@ r300_buffer_transfer_map( struct pipe_context *pipe, struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); uint8_t *map; - if (rbuf->user_buffer) - return (uint8_t *) rbuf->user_buffer + transfer->box.x; + if (rbuf->b.user_ptr) + return (uint8_t *) rbuf->b.user_ptr + transfer->box.x; if (rbuf->constant_buffer) return (uint8_t *) rbuf->constant_buffer + transfer->box.x; - map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); + map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; @@ -204,22 +137,15 @@ r300_buffer_transfer_map( struct pipe_context *pipe, return map + transfer->box.x; } -static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - /* no-op */ -} - static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; - struct r300_buffer *rbuf = r300_buffer(transfer->resource); + struct r300_resource *rbuf = r300_resource(transfer->resource); if (rbuf->buf) { - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } } @@ -234,32 +160,32 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_winsys_screen *rws = r300->screen->rws; - struct r300_buffer *rbuf = r300_buffer(resource); + struct r300_resource *rbuf = r300_resource(resource); uint8_t *map = NULL; if (rbuf->constant_buffer) { memcpy(rbuf->constant_buffer + box->x, data, box->width); return; } - assert(rbuf->user_buffer == NULL); + assert(rbuf->b.user_ptr == NULL); - map = rws->buffer_map(rws, rbuf->buf, r300->cs, + map = rws->buffer_map(rbuf->buf, r300->cs, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage); memcpy(map + box->x, data, box->width); - rws->buffer_unmap(rws, rbuf->buf); + rws->buffer_unmap(rbuf->buf); } -struct u_resource_vtbl r300_buffer_vtbl = +static const struct u_resource_vtbl r300_buffer_vtbl = { - u_default_resource_get_handle, /* get_handle */ + NULL, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + NULL, /* is_buffer_referenced */ r300_buffer_get_transfer, /* get_transfer */ r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ - r300_buffer_transfer_flush_region, /* transfer_flush_region */ + NULL, /* transfer_flush_region */ r300_buffer_transfer_unmap, /* transfer_unmap */ r300_buffer_transfer_inline_write /* transfer_inline_write */ }; @@ -268,42 +194,41 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; unsigned alignment = 16; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - rbuf->b.b = *templ; - rbuf->b.vtbl = &r300_buffer_vtbl; - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.b.screen = screen; + rbuf->b.b.b = *templ; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.user_ptr = NULL; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; + rbuf->buf_size = templ->width0; rbuf->constant_buffer = NULL; - rbuf->user_buffer = NULL; /* Alloc constant buffers in RAM. */ if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { rbuf->constant_buffer = MALLOC(templ->width0); - return &rbuf->b.b; + return &rbuf->b.b.b; } rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - rbuf->b.b.width0, alignment, - rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->b.b.b.width0, alignment, + rbuf->b.b.b.bind, rbuf->b.b.b.usage, rbuf->domain); - rbuf->cs_buf = - r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf); - if (!rbuf->buf) { util_slab_free(&r300screen->pool_buffers, rbuf); return NULL; } - return &rbuf->b.b; + rbuf->cs_buf = + r300screen->rws->buffer_get_cs_handle(rbuf->buf); + + return &rbuf->b.b.b; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, @@ -311,27 +236,26 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bind) { struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf; + struct r300_resource *rbuf; rbuf = util_slab_alloc(&r300screen->pool_buffers); - rbuf->magic = R300_BUFFER_MAGIC; - - pipe_reference_init(&rbuf->b.b.reference, 1); - rbuf->b.vtbl = &r300_buffer_vtbl; - rbuf->b.b.screen = screen; - rbuf->b.b.target = PIPE_BUFFER; - rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; - rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; - rbuf->b.b.bind = bind; - rbuf->b.b.width0 = ~0; - rbuf->b.b.height0 = 1; - rbuf->b.b.depth0 = 1; - rbuf->b.b.array_size = 1; - rbuf->b.b.flags = 0; + pipe_reference_init(&rbuf->b.b.b.reference, 1); + rbuf->b.b.b.screen = screen; + rbuf->b.b.b.target = PIPE_BUFFER; + rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuf->b.b.b.bind = bind; + rbuf->b.b.b.width0 = ~0; + rbuf->b.b.b.height0 = 1; + rbuf->b.b.b.depth0 = 1; + rbuf->b.b.b.array_size = 1; + rbuf->b.b.b.flags = 0; + rbuf->b.b.vtbl = &r300_buffer_vtbl; + rbuf->b.user_ptr = ptr; rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = NULL; + rbuf->buf_size = size; rbuf->constant_buffer = NULL; - rbuf->user_buffer = ptr; - return &rbuf->b.b; + return &rbuf->b.b.b; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 58dec8539b6..14bee460d5b 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -35,39 +35,12 @@ #include "r300_winsys.h" #include "r300_context.h" -#define R300_BUFFER_MAGIC 0xabcd1234 -#define R300_BUFFER_MAX_RANGES 32 - -struct r300_buffer_range { - uint32_t start; - uint32_t end; -}; - -/* Vertex buffer. */ -struct r300_buffer -{ - struct u_resource b; - - uint32_t magic; - - struct r300_winsys_buffer *buf; - struct r300_winsys_cs_buffer *cs_buf; - - enum r300_buffer_domain domain; - - uint8_t *user_buffer; - uint8_t *constant_buffer; -}; - /* Functions. */ -void r300_upload_user_buffers(struct r300_context *r300, - int min_index, int max_index); - void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, - unsigned count); + unsigned count, uint8_t *ptr); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); @@ -77,8 +50,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bind); unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf, - enum r300_reference_domain domain); + struct pipe_resource *buf); /* Inline functions. */ @@ -87,9 +59,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) return (struct r300_buffer *)buffer; } -static INLINE boolean r300_is_user_buffer(struct pipe_resource *buffer) -{ - return r300_buffer(buffer)->user_buffer ? true : false; -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3a97b76a4c8..09f18b3e624 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -28,6 +28,7 @@ #include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" @@ -442,8 +443,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, OUT_CB_TABLE(state->ucp, state->nr * 4); } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN | - (state->depth_clamp ? R300_CLIP_DISABLE : 0)); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CB; r300_mark_atom_dirty(r300, &r300->clip_state); @@ -617,21 +617,16 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_texture *tex, unsigned level) + struct r300_resource *tex, + unsigned level) { /* Check if the macrotile flag needs to be changed. * Skip changing the flags otherwise. */ - if (tex->desc.macrotile[tex->surface_level] != - tex->desc.macrotile[level]) { - /* Tiling determines how DRM treats the buffer data. - * We must flush CS when changing it if the buffer is referenced. */ - if (r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS)) - r300->context.flush(&r300->context, 0, NULL); - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[level], - tex->desc.stride_in_bytes[0]); + if (tex->tex.macrotile[tex->surface_level] != + tex->tex.macrotile[level]) { + r300->rws->buffer_set_tiling(tex->buf, r300->cs, + tex->tex.microtile, tex->tex.macrotile[level], + tex->tex.stride_in_bytes[0]); tex->surface_level = level; } @@ -646,12 +641,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300, /* Set tiling flags for new surfaces. */ for (i = 0; i < state->nr_cbufs; i++) { r300_tex_set_tiling_flags(r300, - r300_texture(state->cbufs[i]->texture), + r300_resource(state->cbufs[i]->texture), state->cbufs[i]->u.tex.level); } if (state->zsbuf) { r300_tex_set_tiling_flags(r300, - r300_texture(state->zsbuf->texture), + r300_resource(state->zsbuf->texture), state->zsbuf->u.tex.level); } } @@ -660,7 +655,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, const char *binding) { struct pipe_resource *tex = surf->texture; - struct r300_texture *rtex = r300_texture(tex); + struct r300_resource *rtex = r300_resource(tex); fprintf(stderr, "r300: %s[%i] Dim: %ix%i, Firstlayer: %i, " @@ -673,9 +668,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level, util_format_short_name(surf->format), - rtex->desc.macrotile[0] ? "YES" : " NO", - rtex->desc.microtile ? "YES" : " NO", - rtex->desc.stride_in_pixels[0], + rtex->tex.macrotile[0] ? "YES" : " NO", + rtex->tex.microtile ? "YES" : " NO", + rtex->tex.stride_in_pixels[0], tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } @@ -695,8 +690,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } if (change == R300_CHANGED_FB_STATE || - change == R300_CHANGED_CBZB_FLAG || - change == R300_CHANGED_ZCLEAR_FLAG) { + change == R300_CHANGED_HYPERZ_FLAG) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -720,8 +714,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, } static void - r300_set_framebuffer_state(struct pipe_context* pipe, - const struct pipe_framebuffer_state* state) +r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; @@ -729,7 +723,6 @@ static void boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -745,6 +738,32 @@ static void return; } + if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + /* There is a zmask in use, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { + /* Decompress the currently bound zbuffer before we bind another one. */ + r300_decompress_zmask(r300); + } + } else { + /* We don't bind another zbuffer, so lock the current one. */ + r300->zmask_locked = TRUE; + pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); + } + } else if (r300->zmask_locked && r300->locked_zbuffer) { + /* We have a locked zbuffer now, what are we gonna do? */ + if (state->zsbuf) { + if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { + /* We are binding some other zbuffer, so decompress the locked one, + * it gets unlocked automatically. */ + r300_decompress_zmask_locked_unsafe(r300); + } else { + /* We are binding the locked zbuffer again, so unlock it. */ + r300->zmask_locked = FALSE; + } + } + } + /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300_mark_atom_dirty(r300, &r300->blend_state); @@ -759,14 +778,14 @@ static void util_copy_framebuffer_state(r300->fb_state.state, state); + if (!r300->zmask_locked) { + pipe_surface_reference(&r300->locked_zbuffer, NULL); + } + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - r300->validate_buffers = TRUE; - r300->z_compression = false; - if (state->zsbuf) { - blocksize = util_format_get_blocksize(state->zsbuf->texture->format); - switch (blocksize) { + switch (util_format_get_blocksize(state->zsbuf->texture->format)) { case 2: zbuffer_bpp = 16; break; @@ -774,30 +793,19 @@ static void zbuffer_bpp = 24; break; } + + /* Setup Hyper-Z. */ if (can_hyperz) { struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_texture *tex; - int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + struct r300_resource *tex = r300_resource(zs_surf->base.texture); int level = zs_surf->base.u.tex.level; - tex = r300_texture(zs_surf->base.texture); - /* work out whether we can support hiz on this buffer */ r300_hiz_alloc_block(r300, zs_surf); - - /* work out whether we can support zmask features on this buffer */ - r300_zmask_alloc_block(r300, zs_surf, compress); - - if (tex->zmask_mem[level]) { - /* compression causes hangs on 16-bit */ - if (zbuffer_bpp == 24) - r300->z_compression = compress; - } + DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, - r300->z_compression, tex->zmask_mem[level] ? 1 : 0, - tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); } /* Polygon offset depends on the zbuffer bit depth. */ @@ -1300,7 +1308,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture *texture; + struct r300_resource *texture; unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1329,8 +1337,8 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ - texture = r300_texture(views[i]->texture); - if (texture->desc.is_npot) { + texture = r300_resource(views[i]->texture); + if (texture->tex.is_npot) { r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state); } @@ -1350,7 +1358,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, state->sampler_view_count = count; r300_mark_atom_dirty(r300, &r300->textures_state); - r300->validate_buffers = TRUE; if (dirty_tex) { r300_mark_atom_dirty(r300, &r300->texture_cache_inval); @@ -1363,7 +1370,7 @@ r300_create_sampler_view(struct pipe_context *pipe, const struct pipe_sampler_view *templ) { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; @@ -1465,10 +1472,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - const struct pipe_vertex_buffer *vbo; - unsigned i, max_index = (1 << 24) - 1; - boolean any_user_buffer = FALSE; - boolean any_nonuser_buffer = FALSE; + unsigned i; struct pipe_vertex_buffer dummy_vb = {0}; /* There must be at least one vertex buffer set, otherwise it locks up. */ @@ -1478,90 +1482,20 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, count = 1; } - if (count == r300->vertex_buffer_count && - memcmp(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count) == 0) { - return; - } + u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers); if (r300->screen->caps.has_tcl) { /* HW TCL. */ - r300->incompatible_vb_layout = FALSE; - - /* Check if the strides and offsets are aligned to the size of DWORD. */ - for (i = 0; i < count; i++) { - if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0 || - buffers[i].buffer_offset % 4 != 0) { - r300->incompatible_vb_layout = TRUE; - break; - } - } - } - for (i = 0; i < count; i++) { - vbo = &buffers[i]; - - /* Skip NULL buffers */ - if (!vbo->buffer) { - continue; - } - - /* User buffers have no info about maximum index, - * we will have to compute it in draw_vbo. */ - if (r300_is_user_buffer(vbo->buffer)) { - any_user_buffer = TRUE; - continue; - } - any_nonuser_buffer = TRUE; - - /* The stride of zero means we will be fetching only the first - * vertex, so don't care about max_index. */ - if (!vbo->stride) - continue; - - /* Update the maximum index. */ - { - unsigned vbo_max_index = - (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - max_index = MIN2(max_index, vbo_max_index); + if (buffers[i].buffer && + !r300_resource(buffers[i].buffer)->b.user_ptr) { } } - - r300->any_user_vbs = any_user_buffer; - r300->vertex_buffer_max_index = max_index; r300->vertex_arrays_dirty = TRUE; - if (any_nonuser_buffer) - r300->validate_buffers = TRUE; - if (!any_user_buffer) - r300->upload_vb_validated = FALSE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); } - - /* Common code. */ - for (i = 0; i < count; i++) { - vbo = &buffers[i]; - - /* Reference our buffer. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer); - if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) { - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); - } else { - pipe_resource_reference(&r300->valid_vertex_buffer[i], vbo->buffer); - } - } - for (; i < r300->vertex_buffer_count; i++) { - /* Dereference any old buffers. */ - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL); - } - - memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); - - r300->vertex_buffer_count = count; } static void r300_set_index_buffer(struct pipe_context* pipe, @@ -1570,14 +1504,11 @@ static void r300_set_index_buffer(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (ib && ib->buffer) { + assert(ib->offset % ib->index_size == 0); + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); - - if (r300->screen->caps.has_tcl && - !r300_is_user_buffer(ib->buffer)) { - r300->validate_buffers = TRUE; - r300->upload_ib_validated = FALSE; - } + r300->index_buffer.offset /= r300->index_buffer.index_size; } else { pipe_resource_reference(&r300->index_buffer.buffer, NULL); @@ -1607,7 +1538,7 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ for (i = 0; i < velems->count; i++) { - format = velems->hw_format[i]; + format = velems->velem[i].src_format; type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -1639,16 +1570,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) vstream->count = (i >> 1) + 1; } -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - static void* r300_create_vertex_elements_state(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* attribs) { + struct r300_context *r300 = r300_context(pipe); struct r300_vertex_element_state *velems; unsigned i; - enum pipe_format *format; struct pipe_vertex_element dummy_attrib = {0}; /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */ @@ -1660,77 +1588,26 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); - if (velems != NULL) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); - - if (r300_screen(pipe->screen)->caps.has_tcl) { - /* Set the best hw format in case the original format is not - * supported by hw. */ - for (i = 0; i < count; i++) { - velems->hw_format[i] = velems->velem[i].src_format; - format = &velems->hw_format[i]; - - /* This is basically the list of unsupported formats. - * For now we don't care about the alignment, that's going to - * be sorted out after the PSC setup. */ - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_UNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_USCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SNORM, R32_FLOAT); - FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); - FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); - - FORMAT_REPLACE(R32_FIXED, R32_FLOAT); - FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); - FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); - FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); - - default:; - } + if (!velems) + return NULL; - velems->incompatible_layout = - velems->incompatible_layout || - velems->velem[i].src_format != velems->hw_format[i] || - velems->velem[i].src_offset % 4 != 0; - } + velems->count = count; + velems->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs, + velems->velem); - /* Now setup PSC. - * The unused components will be replaced by (..., 0, 1). */ - r300_vertex_psc(velems); - - /* Align the formats to the size of DWORD. - * We only care about the blocksizes of the formats since - * swizzles are already set up. - * Also compute the vertex size. */ - for (i = 0; i < count; i++) { - /* This is OK because we check for aligned strides too - * elsewhere. */ - velems->hw_format_size[i] = - align(util_format_get_blocksize(velems->hw_format[i]), 4); - velems->vertex_size_dwords += velems->hw_format_size[i] / 4; - } + if (r300_screen(pipe->screen)->caps.has_tcl) { + /* Setup PSC. + * The unused components will be replaced by (..., 0, 1). */ + r300_vertex_psc(velems); + + for (i = 0; i < count; i++) { + velems->format_size[i] = + align(util_format_get_blocksize(velems->velem[i].src_format), 4); + velems->vertex_size_dwords += velems->format_size[i] / 4; } } + return velems; } @@ -1746,6 +1623,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; + u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements); + if (r300->draw) { draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; @@ -1758,7 +1637,11 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) { - FREE(state); + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements); + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1845,7 +1728,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_resource *rbuf = r300_resource(buf); uint32_t *mapped; switch (shader) { @@ -1862,8 +1745,8 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, if (buf == NULL || buf->width0 == 0) return; - if (rbuf->user_buffer) - mapped = (uint32_t*)rbuf->user_buffer; + if (rbuf->b.user_ptr) + mapped = (uint32_t*)rbuf->b.user_ptr; else if (rbuf->constant_buffer) mapped = (uint32_t*)rbuf->constant_buffer; else @@ -1947,6 +1830,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_vertex_buffers = r300_set_vertex_buffers; r300->context.set_index_buffer = r300_set_index_buffer; + r300->context.redefine_user_buffer = u_default_redefine_user_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 95be7849f8f..003fe9a58cd 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -489,7 +489,8 @@ static void r300_update_rs_block(struct r300_context *r300) for (; i < ATTR_GENERIC_COUNT; i++) { if (fs_inputs->generic[i] != ATTR_UNUSED) { fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, " - "not enough hardware slots.\n", i); + "not enough hardware slots (it's not a bug, do not " + "report it).\n", i); } } @@ -524,7 +525,8 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n"); } else { fprintf(stderr, "r300: ERROR: FS input fog unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, " + "do not report it)\n"); } } } @@ -551,7 +553,8 @@ static void r300_update_rs_block(struct r300_context *r300) } else { if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) { fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, " - "not enough hardware slots.\n"); + "not enough hardware slots. (it's not a bug, do not " + "report it)\n"); } } @@ -674,7 +677,20 @@ static uint32_t r300_get_border_color(enum pipe_format format, case 16: if (desc->nr_channels <= 2) { border_swizzled[0] = border_swizzled[2]; - util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc); + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc); + } + } else { + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + } + break; + + case 32: + if (desc->nr_channels == 1) { + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc); } else { util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); } @@ -684,6 +700,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, return uc.ui; } +static boolean util_format_is_float(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned i; + + if (!format) + return FALSE; + + /* Find the first non-void channel. */ + for (i = 0; i < 4; i++) + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + + if (i == 4) + return FALSE; + + return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE; +} + static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -691,7 +726,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_texture_sampler_state *texstate; struct r300_sampler_state *sampler; struct r300_sampler_view *view; - struct r300_texture *tex; + struct r300_resource *tex; unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); @@ -709,7 +744,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) state->tx_enable |= 1 << i; view = state->sampler_views[i]; - tex = r300_texture(view->base.texture); + tex = r300_resource(view->base.texture); sampler = state->sampler_states[i]; texstate = &state->regs[i]; @@ -725,32 +760,37 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level, - tex->desc.b.b.last_level, view->base.u.tex.last_level); + tex->b.b.b.last_level, view->base.u.tex.last_level); min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level, max_level); - if (tex->desc.is_npot && min_level > 0) { + if (tex->tex.is_npot && min_level > 0) { /* Even though we do not implement mipmapping for NPOT * textures, we should at least honor the minimum level * which is allowed to be displayed. We do this by setting up - * an i-th mipmap level as the zero level. */ - r300_texture_setup_format_state(r300->screen, &tex->desc, + * the i-th mipmap level as the zero level. */ + unsigned offset = tex->tex_offset + + tex->tex.offset_in_bytes[min_level]; + + r300_texture_setup_format_state(r300->screen, tex, min_level, &texstate->format); - texstate->format.tile_config |= - tex->desc.offset_in_bytes[min_level] & 0xffffffe0; - assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0); + texstate->format.tile_config |= offset & 0xffffffe0; + assert((offset & 0x1f) == 0); + } else { + texstate->format.tile_config |= tex->tex_offset & 0xffffffe0; + assert((tex->tex_offset & 0x1f) == 0); } /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; /* Depth textures are kinda special. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + if (util_format_is_depth_or_stencil(tex->b.b.b.format)) { unsigned char depth_swizzle[4]; if (!r300->screen->caps.is_r500 && - util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + util_format_get_blocksizebits(tex->b.b.b.format) == 32) { /* X24x8 is sampled as Y16X16 on r3xx-r4xx. * The depth here is at the Y component. */ for (j = 0; j < 4; j++) @@ -775,17 +815,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } if (r300->screen->caps.dxtc_swizzle && - util_format_is_compressed(tex->desc.b.b.format)) { + util_format_is_compressed(tex->b.b.b.format)) { texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; } /* to emulate 1D textures through 2D ones correctly */ - if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { + if (tex->b.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->desc.is_npot) { + if (tex->tex.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -814,6 +854,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); } + /* Float textures only support nearest and mip-nearest filtering. */ + if (util_format_is_float(tex->b.b.b.format)) { + /* No MAG linear filtering. */ + if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) == + R300_TX_MAG_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK; + texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST; + } + /* No MIN linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) == + R300_TX_MIN_FILTER_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST; + } + /* No mipmap linear filtering. */ + if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) == + R300_TX_MIN_FILTER_MIP_LINEAR) { + texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; + texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST; + } + /* No anisotropic filtering. */ + texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK; + texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY; + } + texstate->filter0 |= i << 28; size += 16; @@ -862,11 +928,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } } -void r300_update_derived_state(struct r300_context* r300) +static void r300_decompress_depth_textures(struct r300_context *r300) { - r300_flush_depth_textures(r300); + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct pipe_resource *tex; + unsigned count = MIN2(state->sampler_view_count, + state->sampler_state_count); + unsigned i; + + if (!r300->zmask_locked || !r300->locked_zbuffer) { + return; + } + for (i = 0; i < count; i++) { + if (state->sampler_views[i] && state->sampler_states[i]) { + tex = state->sampler_views[i]->base.texture; + + if (tex == r300->locked_zbuffer->texture) { + r300_decompress_zmask_locked(r300); + return; + } + } + } +} + +void r300_update_derived_state(struct r300_context* r300) +{ if (r300->textures_state.dirty) { + r300_decompress_depth_textures(r300); r300_merge_textures_and_samplers(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 7e501221b1f..06da04c7ad7 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -25,13 +25,9 @@ #define R300_STATE_INLINES_H #include "draw/draw_vertex.h" - #include "pipe/p_format.h" - #include "util/u_format.h" - #include "r300_reg.h" - #include <stdio.h> /* Some maths. These should probably find their way to u_math, if needed. */ @@ -341,24 +337,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso) R500_TX_ANISO_HIGH_QUALITY; } -/* Non-CSO state. (For now.) */ - -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - } - return 0; -} - - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 6fdc504ed54..354144cac79 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -370,14 +370,18 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) switch (format) { /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: return R300_COLOR_FORMAT_UV88; @@ -395,13 +399,21 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) /* 32-bit buffers. */ case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; @@ -506,9 +518,12 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 8-bit outputs, one channel. * COLORFORMAT_I8 stores the C2 component. */ case PIPE_FORMAT_A8_UNORM: + /*case PIPE_FORMAT_A8_SNORM:*/ return modifier | R300_C2_SEL_A; case PIPE_FORMAT_I8_UNORM: + /*case PIPE_FORMAT_I8_SNORM:*/ case PIPE_FORMAT_L8_UNORM: + /*case PIPE_FORMAT_L8_SNORM:*/ case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return modifier | R300_C2_SEL_R; @@ -516,6 +531,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* 16-bit outputs, two channels. * COLORFORMAT_UV88 stores C2 and C0. */ case PIPE_FORMAT_L8A8_UNORM: + /*case PIPE_FORMAT_L8A8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C2_SEL_R; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8_SNORM: @@ -528,7 +544,9 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4X4_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/ case PIPE_FORMAT_B8G8R8X8_UNORM: + /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/ case PIPE_FORMAT_B10G10R10A2_UNORM: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | @@ -536,20 +554,26 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) /* ARGB outputs. */ case PIPE_FORMAT_A8R8G8B8_UNORM: + /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/ case PIPE_FORMAT_X8R8G8B8_UNORM: + /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* ABGR outputs. */ case PIPE_FORMAT_A8B8G8R8_UNORM: + /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/ case PIPE_FORMAT_X8B8G8R8_UNORM: + /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/ return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* RGBA outputs. */ case PIPE_FORMAT_R8G8B8X8_UNORM: + /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/ + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_R10G10B10A2_UNORM: @@ -585,11 +609,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) } void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out) { - struct pipe_resource *pt = &desc->b.b; + struct pipe_resource *pt = &tex->b.b.b; + struct r300_texture_desc *desc = &tex->tex; boolean is_r500 = screen->caps.is_r500; /* Mask out all the fields we change. */ @@ -632,163 +657,152 @@ void r300_texture_setup_format_state(struct r300_screen *screen, R300_TXO_MICRO_TILE(desc->microtile); } -static void r300_texture_setup_fb_state(struct r300_screen* screen, - struct r300_texture* tex) +static void r300_texture_setup_fb_state(struct r300_surface *surf) { - unsigned i; + struct r300_resource *tex = r300_resource(surf->base.texture); + unsigned level = surf->base.u.tex.level; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | - R300_DEPTHMICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); + if (util_format_is_depth_or_stencil(surf->base.format)) { + surf->pitch = + tex->tex.stride_in_pixels[level] | + R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | + R300_DEPTHMICROTILE(tex->tex.microtile); + surf->format = r300_translate_zsformat(surf->base.format); } else { - for (i = 0; i <= tex->desc.b.b.last_level; i++) { - tex->fb_state.pitch[i] = - tex->desc.stride_in_pixels[i] | - r300_translate_colorformat(tex->desc.b.b.format) | - R300_COLOR_TILE(tex->desc.macrotile[i]) | - R300_COLOR_MICROTILE(tex->desc.microtile); - } - tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); + surf->pitch = + tex->tex.stride_in_pixels[level] | + r300_translate_colorformat(surf->base.format) | + R300_COLOR_TILE(tex->tex.macrotile[level]) | + R300_COLOR_MICROTILE(tex->tex.microtile); + surf->format = r300_translate_out_fmt(surf->base.format); } } -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format) + unsigned offset, + const struct pipe_resource *new_properties) { - struct r300_screen *r300screen = r300_screen(screen); + struct r300_screen *rscreen = r300_screen(screen); + struct r300_resource *res = r300_resource(tex); - SCREEN_DBG(r300screen, DBG_TEX, - "r300: texture_reinterpret_format: %s -> %s\n", + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_set_properties: %s -> %s\n", util_format_short_name(tex->format), - util_format_short_name(new_format)); - - tex->format = new_format; + util_format_short_name(new_properties->format)); - r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); -} - -static unsigned r300_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct r300_context *r300 = r300_context(context); - struct r300_texture *rtex = (struct r300_texture *)texture; - - if (r300->rws->cs_is_buffer_referenced(r300->cs, - rtex->cs_buffer, R300_REF_CS)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + if (!r300_texture_desc_init(rscreen, res, new_properties)) { + fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n"); + return FALSE; + } + res->tex_offset = offset; + r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format); - return PIPE_UNREFERENCED; + return TRUE; } static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { - struct r300_texture* tex = (struct r300_texture*)texture; - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + struct r300_resource* tex = (struct r300_resource*)texture; int i; - rws->buffer_reference(rws, &tex->buffer, NULL); + r300_winsys_bo_reference(&tex->buf, NULL); for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { if (tex->hiz_mem[i]) u_mmFreeMem(tex->hiz_mem[i]); - if (tex->zmask_mem[i]) - u_mmFreeMem(tex->zmask_mem[i]); } FREE(tex); } -static boolean r300_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *texture, - struct winsys_handle *whandle) +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle) { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_resource* tex = (struct r300_resource*)texture; if (!tex) { return FALSE; } - return rws->buffer_get_handle(rws, tex->buffer, - tex->desc.stride_in_bytes[0], whandle); + return rws->buffer_get_handle(tex->buf, + tex->tex.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +static const struct u_resource_vtbl r300_texture_vtbl = { - r300_texture_get_handle, /* get_handle */ - r300_texture_destroy, /* resource_destroy */ - r300_texture_is_referenced, /* is_resource_referenced */ - r300_texture_get_transfer, /* get_transfer */ - r300_texture_transfer_destroy, /* transfer_destroy */ - r300_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region, /* transfer_flush_region */ - r300_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + NULL, /* get_handle */ + r300_texture_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + r300_texture_get_transfer, /* get_transfer */ + r300_texture_transfer_destroy, /* transfer_destroy */ + r300_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + r300_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; /* The common texture constructor. */ -static struct r300_texture* +static struct r300_resource* r300_texture_create_object(struct r300_screen *rscreen, const struct pipe_resource *base, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride_in_bytes_override, unsigned max_buffer_size, - struct r300_winsys_buffer *buffer) + struct r300_winsys_bo *buffer) { struct r300_winsys_screen *rws = rscreen->rws; - struct r300_texture *tex = CALLOC_STRUCT(r300_texture); + struct r300_resource *tex = CALLOC_STRUCT(r300_resource); if (!tex) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); return NULL; } - /* Initialize the descriptor. */ - if (!r300_texture_desc_init(rscreen, &tex->desc, base, - microtile, macrotile, - stride_in_bytes_override, - max_buffer_size)) { + pipe_reference_init(&tex->b.b.b.reference, 1); + tex->b.b.b.screen = &rscreen->screen; + tex->b.b.b.usage = base->usage; + tex->b.b.b.bind = base->bind; + tex->b.b.b.flags = base->flags; + tex->b.b.vtbl = &r300_texture_vtbl; + tex->tex.microtile = microtile; + tex->tex.macrotile[0] = macrotile; + tex->tex.stride_in_bytes_override = stride_in_bytes_override; + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buf_size = max_buffer_size; + + if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) { if (buffer) - rws->buffer_reference(rws, &buffer, NULL); + r300_winsys_bo_reference(&buffer, NULL); FREE(tex); return NULL; } - /* Initialize the hardware state. */ - r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format); - r300_texture_setup_fb_state(rscreen, tex); - - tex->desc.b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->desc.b.b.reference, 1); - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? - R300_DOMAIN_GTT : - R300_DOMAIN_VRAM | R300_DOMAIN_GTT; - tex->buffer = buffer; /* Create the backing buffer if needed. */ - if (!tex->buffer) { - tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + if (!buffer) { + tex->buf_size = tex->tex.size_in_bytes; + tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, base->bind, base->usage, tex->domain); - if (!tex->buffer) { + if (!tex->buf) { FREE(tex); return NULL; } + } else { + tex->buf = buffer; } - tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer); + tex->cs_buf = rws->buffer_get_cs_handle(tex->buf); - rws->buffer_set_tiling(rws, tex->buffer, - tex->desc.microtile, tex->desc.macrotile[0], - tex->desc.stride_in_bytes[0]); + rws->buffer_set_tiling(tex->buf, NULL, + tex->tex.microtile, tex->tex.macrotile[0], + tex->tex.stride_in_bytes[0]); return tex; } @@ -820,7 +834,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen *rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; + struct r300_winsys_bo *buffer; enum r300_buffer_tiling microtile, macrotile; unsigned stride, size; @@ -836,7 +850,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, if (!buffer) return NULL; - rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); + rws->buffer_get_tiling(buffer, µtile, ¯otile); /* Enforce a microtiled zbuffer. */ if (util_format_is_depth_or_stencil(base->format) && @@ -847,7 +861,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + if (rws->get_value(rws, R300_VID_DRM_2_1_0)) microtile = R300_BUFFER_SQUARETILED; break; } @@ -864,7 +878,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, struct pipe_resource* texture, const struct pipe_surface *surf_tmpl) { - struct r300_texture* tex = r300_texture(texture); + struct r300_resource* tex = r300_resource(texture); struct r300_surface* surface = CALLOC_STRUCT(r300_surface); unsigned level = surf_tmpl->u.tex.level; @@ -884,28 +898,27 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - surface->buffer = tex->buffer; - surface->cs_buffer = tex->cs_buffer; + surface->buf = tex->buf; + surface->cs_buf = tex->cs_buf; /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; if (surface->domain & R300_DOMAIN_VRAM) surface->domain &= ~R300_DOMAIN_GTT; - surface->offset = r300_texture_get_offset(&tex->desc, level, + surface->offset = r300_texture_get_offset(tex, level, surf_tmpl->u.tex.first_layer); - surface->pitch = tex->fb_state.pitch[level]; - surface->format = tex->fb_state.format; + r300_texture_setup_fb_state(surface); /* Parameters for the CBZB clear. */ - surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_allowed = tex->tex.cbzb_allowed[level]; surface->cbzb_width = align(surface->base.width, 64); /* Height must be aligned to the size of a tile. */ - tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, - tex->desc.b.b.nr_samples, - tex->desc.microtile, - tex->desc.macrotile[level], + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_HEIGHT, 0); surface->cbzb_height = align((surface->base.height + 1) / 2, @@ -914,7 +927,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, /* Offset must be aligned to 2K and must point at the beginning * of a scanline. */ offset = surface->offset + - tex->desc.stride_in_bytes[level] * surface->cbzb_height; + tex->tex.stride_in_bytes[level] * surface->cbzb_height; surface->cbzb_midpoint_offset = offset & ~2047; surface->cbzb_pitch = surface->pitch & 0x1ffffc; @@ -929,8 +942,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx, surface->cbzb_allowed ? "YES" : " NO", surface->cbzb_width, surface->cbzb_height, offset & 2047, - tex->desc.microtile ? "YES" : " NO", - tex->desc.macrotile[level] ? "YES" : " NO"); + tex->tex.microtile ? "YES" : " NO", + tex->tex.macrotile[level] ? "YES" : " NO"); } return &surface->base; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 0ab22f747e4..158a387478f 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -32,7 +32,7 @@ struct pipe_resource; struct winsys_handle; struct r300_texture_format_state; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, uint32_t r500_tx_format_msb_bit(enum pipe_format format); -void r300_texture_reinterpret_format(struct pipe_screen *screen, +boolean r300_resource_set_properties(struct pipe_screen *screen, struct pipe_resource *tex, - enum pipe_format new_format); + unsigned offset, + const struct pipe_resource *new_properties); boolean r300_is_colorbuffer_format_supported(enum pipe_format format); @@ -57,10 +58,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format); boolean r300_is_sampler_format_supported(enum pipe_format format); void r300_texture_setup_format_state(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level, struct r300_texture_format_state *out); +boolean r300_resource_get_handle(struct pipe_screen* screen, + struct pipe_resource *texture, + struct winsys_handle *whandle); + struct pipe_resource* r300_texture_from_handle(struct pipe_screen* screen, const struct pipe_resource* base, diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 7b1739142d4..2cfeec7d751 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -90,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, } /* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, +static boolean r300_texture_macro_switch(struct r300_resource *tex, unsigned level, boolean rv350_mode, enum r300_dim dim) { unsigned tile, texdim; - tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, - desc->microtile, R300_BUFFER_TILED, dim, 0); + tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, + tex->tex.microtile, R300_BUFFER_TILED, dim, 0); if (dim == DIM_WIDTH) { - texdim = u_minify(desc->width0, level); + texdim = u_minify(tex->tex.width0, level); } else { - texdim = u_minify(desc->height0, level); + texdim = u_minify(tex->tex.height0, level); } /* See TX_FILTER1_n.MACRO_SWITCH. */ @@ -118,7 +118,7 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, * at the given level. */ static unsigned r300_texture_get_stride(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, unsigned level) { unsigned tile_width, width, stride; @@ -126,62 +126,62 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, screen->caps.family == CHIP_FAMILY_RS690 || screen->caps.family == CHIP_FAMILY_RS740); - if (desc->stride_in_bytes_override) - return desc->stride_in_bytes_override; + if (tex->tex.stride_in_bytes_override) + return tex->tex.stride_in_bytes_override; /* Check the level. */ - if (level > desc->b.b.last_level) { + if (level > tex->b.b.b.last_level) { SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, desc->b.b.last_level); + __FUNCTION__, level, tex->b.b.b.last_level); return 0; } - width = u_minify(desc->width0, level); + width = u_minify(tex->tex.width0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_width = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], + if (util_format_is_plain(tex->b.b.b.format)) { + tile_width = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_WIDTH, is_rs690); width = align(width, tile_width); - stride = util_format_get_stride(desc->b.b.format, width); + stride = util_format_get_stride(tex->b.b.b.format, width); /* The alignment to 32 bytes is sort of implied by the layout... */ return stride; } else { - return align(util_format_get_stride(desc->b.b.format, width), is_rs690 ? 64 : 32); + return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32); } } -static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, +static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, unsigned level, boolean *out_aligned_for_cbzb) { unsigned height, tile_height; - height = u_minify(desc->height0, level); + height = u_minify(tex->tex.height0, level); - if (util_format_is_plain(desc->b.b.format)) { - tile_height = r300_get_pixel_alignment(desc->b.b.format, - desc->b.b.nr_samples, - desc->microtile, - desc->macrotile[level], + /* Mipmapped and 3D textures must have their height aligned to POT. */ + if ((tex->b.b.b.target != PIPE_TEXTURE_1D && + tex->b.b.b.target != PIPE_TEXTURE_2D && + tex->b.b.b.target != PIPE_TEXTURE_RECT) || + tex->b.b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + if (util_format_is_plain(tex->b.b.b.format)) { + tile_height = r300_get_pixel_alignment(tex->b.b.b.format, + tex->b.b.b.nr_samples, + tex->tex.microtile, + tex->tex.macrotile[level], DIM_HEIGHT, 0); height = align(height, tile_height); - /* This is needed for the kernel checker, unfortunately. */ - if ((desc->b.b.target != PIPE_TEXTURE_1D && - desc->b.b.target != PIPE_TEXTURE_2D && - desc->b.b.target != PIPE_TEXTURE_RECT) || - desc->b.b.last_level != 0) { - height = util_next_power_of_two(height); - } - /* See if the CBZB clear can be used on the buffer, * taking the texture size into account. */ if (out_aligned_for_cbzb) { - if (desc->macrotile[level]) { + if (tex->tex.macrotile[level]) { /* When clearing, the layer (width*height) is horizontally split * into two, and the upper and lower halves are cleared by the CB * and ZB units, respectively. Therefore, the number of macrotiles @@ -189,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, /* Align the height so that there is an even number of macrotiles. * Do so for 3 or more macrotiles in the Y direction. */ - if (level == 0 && desc->b.b.last_level == 0 && - (desc->b.b.target == PIPE_TEXTURE_1D || - desc->b.b.target == PIPE_TEXTURE_2D || - desc->b.b.target == PIPE_TEXTURE_RECT) && + if (level == 0 && tex->b.b.b.last_level == 0 && + (tex->b.b.b.target == PIPE_TEXTURE_1D || + tex->b.b.b.target == PIPE_TEXTURE_2D || + tex->b.b.b.target == PIPE_TEXTURE_RECT) && height >= tile_height * 3) { height = align(height, tile_height * 2); } @@ -204,11 +204,11 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, } } - return util_format_get_nblocksy(desc->b.b.format, height); + return util_format_get_nblocksy(tex->b.b.b.format, height); } static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures * incorrectly. This is a workaround to prevent CS from being rejected. */ @@ -216,17 +216,17 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, unsigned i, size; if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - desc->b.b.target == PIPE_TEXTURE_3D && - desc->b.b.last_level > 0) { + tex->b.b.b.target == PIPE_TEXTURE_3D && + tex->b.b.b.last_level > 0) { size = 0; - for (i = 0; i <= desc->b.b.last_level; i++) { - size += desc->stride_in_bytes[i] * - r300_texture_get_nblocksy(desc, i, FALSE); + for (i = 0; i <= tex->b.b.b.last_level; i++) { + size += tex->tex.stride_in_bytes[i] * + r300_texture_get_nblocksy(tex, i, FALSE); } - size *= desc->depth0; - desc->size_in_bytes = size; + size *= tex->tex.depth0; + tex->tex.size_in_bytes = size; } } @@ -239,15 +239,15 @@ static unsigned stride_to_width(enum pipe_format format, } static void r300_setup_miptree(struct r300_screen *screen, - struct r300_texture_desc *desc, + struct r300_resource *tex, boolean align_for_cbzb) { - struct pipe_resource *base = &desc->b.b; + struct pipe_resource *base = &tex->b.b.b; unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean aligned_for_cbzb; - desc->size_in_bytes = 0; + tex->tex.size_in_bytes = 0; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", @@ -255,21 +255,21 @@ static void r300_setup_miptree(struct r300_screen *screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - desc->macrotile[i] = - (desc->macrotile[0] == R300_BUFFER_TILED && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + tex->tex.macrotile[i] = + (tex->tex.macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; - stride = r300_texture_get_stride(screen, desc, i); + stride = r300_texture_get_stride(screen, tex, i); /* Compute the number of blocks in Y, see if the CBZB clear can be * used on the texture. */ aligned_for_cbzb = FALSE; - if (align_for_cbzb && desc->cbzb_allowed[i]) - nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + if (align_for_cbzb && tex->tex.cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); else - nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + nblocksy = r300_texture_get_nblocksy(tex, i, NULL); layer_size = stride * nblocksy; @@ -280,75 +280,136 @@ static void r300_setup_miptree(struct r300_screen *screen, if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * u_minify(desc->depth0, i); + size = layer_size * u_minify(tex->tex.depth0, i); - desc->offset_in_bytes[i] = desc->size_in_bytes; - desc->size_in_bytes = desc->offset_in_bytes[i] + size; - desc->layer_size_in_bytes[i] = layer_size; - desc->stride_in_bytes[i] = stride; - desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); - desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; + tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; + tex->tex.layer_size_in_bytes[i] = layer_size; + tex->tex.stride_in_bytes[i] = stride; + tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride); + tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(desc->width0, i), u_minify(desc->height0, i), - u_minify(desc->depth0, i), stride, desc->size_in_bytes, - desc->macrotile[i] ? "TRUE" : "FALSE"); + i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), + u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, + tex->tex.macrotile[i] ? "TRUE" : "FALSE"); } } -static void r300_setup_flags(struct r300_texture_desc *desc) +static void r300_setup_flags(struct r300_resource *tex) { - desc->uses_stride_addressing = - !util_is_power_of_two(desc->b.b.width0) || - (desc->stride_in_bytes_override && - stride_to_width(desc->b.b.format, - desc->stride_in_bytes_override) != desc->b.b.width0); - - desc->is_npot = - desc->uses_stride_addressing || - !util_is_power_of_two(desc->b.b.height0) || - !util_is_power_of_two(desc->b.b.depth0); + tex->tex.uses_stride_addressing = + !util_is_power_of_two(tex->b.b.b.width0) || + (tex->tex.stride_in_bytes_override && + stride_to_width(tex->b.b.b.format, + tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); + + tex->tex.is_npot = + tex->tex.uses_stride_addressing || + !util_is_power_of_two(tex->b.b.b.height0) || + !util_is_power_of_two(tex->b.b.b.depth0); } static void r300_setup_cbzb_flags(struct r300_screen *rscreen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { unsigned i, bpp; boolean first_level_valid; - bpp = util_format_get_blocksizebits(desc->b.b.format); + bpp = util_format_get_blocksizebits(tex->b.b.b.format); /* 1) The texture must be point-sampled, * 2) The depth must be 16 or 32 bits. * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage * with certain texture sizes. Macrotiling ensures the alignment. */ - first_level_valid = desc->b.b.nr_samples <= 1 && + first_level_valid = tex->b.b.b.nr_samples <= 1 && (bpp == 16 || bpp == 32) && - desc->macrotile[0]; + tex->tex.macrotile[0]; if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) first_level_valid = FALSE; - for (i = 0; i <= desc->b.b.last_level; i++) - desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; + for (i = 0; i <= tex->b.b.b.last_level; i++) + tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_setup_zmask_flags(struct r300_screen *screen, + struct r300_resource *tex) +{ + /* The tile size of 1 DWORD is: + * + * GPU Pipes 4x4 mode 8x8 mode + * ------------------------------------------ + * R580 4P/1Z 32x32 64x64 + * RV570 3P/1Z 48x16 96x32 + * RV530 1P/2Z 32x16 64x32 + * 1P/1Z 16x16 32x32 + */ + static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + if (util_format_is_depth_or_stencil(tex->b.b.b.format) && + util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + tex->tex.microtile) { + unsigned i, pipes; + + if (screen->caps.family == CHIP_FAMILY_RV530) { + pipes = screen->caps.num_z_pipes; + } else { + pipes = screen->caps.num_frag_pipes; + } + + for (i = 0; i <= tex->b.b.b.last_level; i++) { + unsigned numdw, compsize; + + /* The 8x8 compression mode needs macrotiling. */ + compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + tex->tex.macrotile[i] && + tex->b.b.b.nr_samples <= 1 ? 8 : 4; + + /* Get the zbuffer size (with the aligned width and height). */ + numdw = align(tex->tex.stride_in_pixels[i], + num_blocks_x_per_dw[pipes-1] * compsize) * + align(u_minify(tex->b.b.b.height0, i), + num_blocks_y_per_dw[pipes-1] * compsize); + + /* Convert pixels -> dwords. */ + numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * + num_blocks_y_per_dw[pipes-1] * compsize); + + /* Check that we have enough ZMASK memory. */ + if (numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = numdw; + tex->tex.zcomp8x8[i] = compsize == 8; + } else { + tex->tex.zmask_dwords[i] = 0; + tex->tex.zcomp8x8[i] = FALSE; + } + } + } } static void r300_setup_tiling(struct r300_screen *screen, - struct r300_texture_desc *desc) + struct r300_resource *tex) { struct r300_winsys_screen *rws = screen->rws; - enum pipe_format format = desc->b.b.format; + enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + tex->tex.microtile = R300_BUFFER_LINEAR; + tex->tex.macrotile[0] = R300_BUFFER_LINEAR; + if (!util_format_is_plain(format)) { return; } /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) { return; } @@ -357,12 +418,12 @@ static void r300_setup_tiling(struct r300_screen *screen, case 1: case 4: case 8: - desc->microtile = R300_BUFFER_TILED; + tex->tex.microtile = R300_BUFFER_TILED; break; case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - desc->microtile = R300_BUFFER_SQUARETILED; + if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { + tex->tex.microtile = R300_BUFFER_SQUARETILED; } break; } @@ -372,104 +433,100 @@ static void r300_setup_tiling(struct r300_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { - desc->macrotile[0] = R300_BUFFER_TILED; + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { + tex->tex.macrotile[0] = R300_BUFFER_TILED; } } -static void r300_tex_print_info(struct r300_screen *rscreen, - struct r300_texture_desc *desc, +static void r300_tex_print_info(struct r300_resource *tex, const char *func) { fprintf(stderr, "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " "LastLevel: %i, Size: %i, Format: %s\n", func, - desc->macrotile[0] ? "YES" : " NO", - desc->microtile ? "YES" : " NO", - desc->stride_in_pixels[0], - desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, - desc->b.b.last_level, desc->size_in_bytes, - util_format_short_name(desc->b.b.format)); + tex->tex.macrotile[0] ? "YES" : " NO", + tex->tex.microtile ? "YES" : " NO", + tex->tex.stride_in_pixels[0], + tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0, + tex->b.b.b.last_level, tex->tex.size_in_bytes, + util_format_short_name(tex->b.b.b.format)); } boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size) + struct r300_resource *tex, + const struct pipe_resource *base) { - desc->b.b = *base; - desc->b.b.screen = &rscreen->screen; - desc->stride_in_bytes_override = stride_in_bytes_override; - desc->width0 = base->width0; - desc->height0 = base->height0; - desc->depth0 = base->depth0; - - r300_setup_flags(desc); + tex->b.b.b.target = base->target; + tex->b.b.b.format = base->format; + tex->b.b.b.width0 = base->width0; + tex->b.b.b.height0 = base->height0; + tex->b.b.b.depth0 = base->depth0; + tex->b.b.b.array_size = base->array_size; + tex->b.b.b.last_level = base->last_level; + tex->b.b.b.nr_samples = base->nr_samples; + tex->tex.width0 = base->width0; + tex->tex.height0 = base->height0; + tex->tex.depth0 = base->depth0; + + r300_setup_flags(tex); /* Align a 3D NPOT texture to POT. */ - if (base->target == PIPE_TEXTURE_3D && desc->is_npot) { - desc->width0 = util_next_power_of_two(desc->width0); - desc->height0 = util_next_power_of_two(desc->height0); - desc->depth0 = util_next_power_of_two(desc->depth0); + if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { + tex->tex.width0 = util_next_power_of_two(tex->tex.width0); + tex->tex.height0 = util_next_power_of_two(tex->tex.height0); + tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); } /* Setup tiling. */ - if (microtile == R300_BUFFER_SELECT_LAYOUT || - macrotile == R300_BUFFER_SELECT_LAYOUT) { - r300_setup_tiling(rscreen, desc); - } else { - desc->microtile = microtile; - desc->macrotile[0] = macrotile; - assert(desc->b.b.last_level == 0); + if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, tex); } - r300_setup_cbzb_flags(rscreen, desc); + r300_setup_cbzb_flags(rscreen, tex); /* Setup the miptree description. */ - r300_setup_miptree(rscreen, desc, TRUE); + r300_setup_miptree(rscreen, tex, TRUE); /* If the required buffer size is larger the given max size, * try again without the alignment for the CBZB clear. */ - if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { - r300_setup_miptree(rscreen, desc, FALSE); + if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) { + r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, desc); + r300_texture_3d_fix_mipmapping(rscreen, tex); + r300_setup_zmask_flags(rscreen, tex); - if (max_buffer_size) { + if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ - if (desc->size_in_bytes > max_buffer_size) { + if (tex->tex.size_in_bytes > tex->buf_size) { fprintf(stderr, "r300: texture_desc_init: The buffer is not " "large enough. Got: %i, Need: %i, Info:\n", - max_buffer_size, desc->size_in_bytes); - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + tex->buf_size, tex->tex.size_in_bytes); + r300_tex_print_info(tex, "texture_desc_init"); return FALSE; } - desc->buffer_size_in_bytes = max_buffer_size; + tex->tex.buffer_size_in_bytes = tex->buf_size; } else { - desc->buffer_size_in_bytes = desc->size_in_bytes; + tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes; } if (SCREEN_DBG_ON(rscreen, DBG_TEX)) - r300_tex_print_info(rscreen, desc, "texture_desc_init"); + r300_tex_print_info(tex, "texture_desc_init"); return TRUE; } -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer) { - unsigned offset = desc->offset_in_bytes[level]; + unsigned offset = tex->tex.offset_in_bytes[level]; - switch (desc->b.b.target) { + switch (tex->b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * desc->layer_size_in_bytes[level]; + return offset + layer * tex->tex.layer_size_in_bytes[level]; default: assert(layer == 0); diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h index 121d215b4cb..ce6e9643ec6 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.h +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -30,7 +30,7 @@ struct pipe_resource; struct r300_screen; struct r300_texture_desc; -struct r300_texture; +struct r300_resource; enum r300_dim { DIM_WIDTH = 0, @@ -44,14 +44,10 @@ unsigned r300_get_pixel_alignment(enum pipe_format format, enum r300_dim dim, boolean is_rs690); boolean r300_texture_desc_init(struct r300_screen *rscreen, - struct r300_texture_desc *desc, - const struct pipe_resource *base, - enum r300_buffer_tiling microtile, - enum r300_buffer_tiling macrotile, - unsigned stride_in_bytes_override, - unsigned max_buffer_size); - -unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + struct r300_resource *tex, + const struct pipe_resource *base); + +unsigned r300_texture_get_offset(struct r300_resource *tex, unsigned level, unsigned layer); #endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 15a323989b2..97ec0a1a1f2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -191,7 +191,12 @@ static void transform_dstreg( dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->WriteMask = src->Register.WriteMask; - dst->RelAddr = src->Register.Indirect; + + if (src->Register.Indirect) { + ttr->error = TRUE; + fprintf(stderr, "r300: Relative addressing of destination operands " + "is unsupported.\n"); + } } static void transform_srcreg( @@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, unsigned imm_index = 0; int i; + ttr->error = FALSE; + /* Allocate constants placeholders. * * Note: What if declared constants are not contiguous? */ diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 97641a954b9..adb044cfe56 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -47,6 +47,9 @@ struct tgsi_to_rc { /* Vertex shaders have no half swizzles, and no way to handle them, so * until rc grows proper support, indicate if they're safe to use. */ boolean use_half_swizzles; + + /* If an error occured. */ + boolean error; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index ae93fab554e..30de9ec1e32 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -37,7 +37,7 @@ struct r300_transfer { unsigned offset; /* Linear texture. */ - struct r300_texture *linear_texture; + struct r300_resource *linear_texture; }; /* Convenience cast wrapper. */ @@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; struct pipe_resource *tex = transfer->resource; - ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0, 0, 0, 0, tex, transfer->level, &transfer->box); } @@ -70,7 +70,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->level, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->linear_texture->desc.b.b, 0, &src_box); + &r300transfer->linear_texture->b.b.b, 0, &src_box); ctx->flush(ctx, 0, NULL); } @@ -83,25 +83,24 @@ r300_texture_get_transfer(struct pipe_context *ctx, const struct pipe_box *box) { struct r300_context *r300 = r300_context(ctx); - struct r300_texture *tex = r300_texture(texture); + struct r300_resource *tex = r300_resource(texture); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; + const struct util_format_description *desc = + util_format_description(texture->format); referenced_cs = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_CS); + r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf); if (referenced_cs) { referenced_hw = TRUE; } else { referenced_hw = - r300->rws->cs_is_buffer_referenced(r300->cs, - tex->cs_buffer, R300_REF_HW); + r300->rws->buffer_is_busy(tex->buf); } - blittable = ctx->screen->is_format_supported( - ctx->screen, texture->format, texture->target, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0); + blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || + desc->layout == UTIL_FORMAT_LAYOUT_S3TC; trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -114,8 +113,13 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->desc.microtile || tex->desc.macrotile[level] || - ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { + if (tex->tex.microtile || tex->tex.macrotile[level] || + (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) { + if (r300->blitter->running) { + fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n"); + os_break(); + } + base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; @@ -140,7 +144,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); @@ -149,14 +153,14 @@ r300_texture_get_transfer(struct pipe_context *ctx, * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->linear_texture = r300_texture( + trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, &base)); if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->desc.microtile && !tex->desc.macrotile[level]) { + if (!tex->tex.microtile && !tex->tex.macrotile[level]) { goto unpipelined; } @@ -168,8 +172,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->linear_texture->desc.microtile && - !trans->linear_texture->desc.macrotile[0]); + assert(!trans->linear_texture->tex.microtile && + !trans->linear_texture->tex.macrotile[0]); /* Set the stride. * @@ -179,7 +183,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - trans->linear_texture->desc.stride_in_bytes[0]; + trans->linear_texture->tex.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -194,8 +198,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = tex->desc.stride_in_bytes[level]; - trans->offset = r300_texture_get_offset(&tex->desc, level, box->z); + trans->transfer.stride = tex->tex.stride_in_bytes[level]; + trans->offset = r300_texture_get_offset(tex, level, box->z); if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); @@ -227,20 +231,19 @@ void* r300_texture_transfer_map(struct pipe_context *ctx, struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); char *map; - enum pipe_format format = tex->desc.b.b.format; + enum pipe_format format = tex->b.b.b.format; if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ - return rws->buffer_map(rws, - r300transfer->linear_texture->buffer, + return rws->buffer_map(r300transfer->linear_texture->buf, r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, r300->cs, + map = rws->buffer_map(tex->buf, r300->cs, transfer->usage); if (!map) { @@ -258,11 +261,11 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); - struct r300_texture *tex = r300_texture(transfer->resource); + struct r300_resource *tex = r300_resource(transfer->resource); if (r300transfer->linear_texture) { - rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); + rws->buffer_unmap(r300transfer->linear_texture->buf); } else { - rws->buffer_unmap(rws, tex->buffer); + rws->buffer_unmap(tex->buf); } } diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 78021e2c5d4..b319890157f 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); + if (ttr.error) { + fprintf(stderr, "r300 VP: Cannot translate a shader. " + "Using a dummy shader instead.\n"); + r300_dummy_vertex_shader(r300, vs); + return; + } + if (compiler.Base.Program.Constants.Count > 200) { compiler.Base.remove_unused_constants = TRUE; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 460da77a4fb..d5c73585c81 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -28,38 +28,45 @@ * Any winsys hosting this pipe needs to implement r300_winsys_screen and then * call r300_screen_create to start things. */ +#include "r300_defines.h" + +#include "pipebuffer/pb_bufmgr.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "r300_defines.h" - #define R300_MAX_CMDBUF_DWORDS (16 * 1024) struct winsys_handle; struct r300_winsys_screen; -struct r300_winsys_buffer; /* for map/unmap etc. */ -struct r300_winsys_cs_buffer; /* for write_reloc etc. */ +#define r300_winsys_bo pb_buffer +#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src) + +struct r300_winsys_cs_handle; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ }; enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, R300_VID_Z_PIPES, - R300_VID_SQUARE_TILING_SUPPORT, - R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ - R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */ - R300_CAN_HYPERZ, -}; - -enum r300_reference_domain { /* bitfield */ - R300_REF_CS = 1, - R300_REF_HW = 2 + R300_VID_GART_SIZE, + R300_VID_VRAM_SIZE, + R300_VID_DRM_MAJOR, + R300_VID_DRM_MINOR, + R300_VID_DRM_PATCHLEVEL, + + /* These should probably go away: */ + R300_VID_DRM_2_1_0, /* Square tiling. */ + R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ + R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */ + + R300_CAN_HYPERZ, /* ZMask + HiZ */ + R300_CAN_AACOMPRESS, /* CMask */ }; struct r300_winsys_screen { @@ -98,91 +105,78 @@ struct r300_winsys_screen { * \param domain A bitmask of the R300_DOMAIN_* flags. * \return The created buffer object. */ - struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws, unsigned size, unsigned alignment, unsigned bind, unsigned usage, enum r300_buffer_domain domain); - struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)( - struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); - - /** - * Reference a buffer object (assign with reference counting). - * - * \param ws The winsys this function is called from. - * \param pdst A destination pointer to set the source buffer to. - * \param src A source buffer object. - */ - void (*buffer_reference)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + struct r300_winsys_cs_handle *(*buffer_get_cs_handle)( + struct r300_winsys_bo *buf); /** * Map the entire data store of a buffer object into the client's address * space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. * \param usage A bitmask of the PIPE_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ - void *(*buffer_map)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void *(*buffer_map)(struct r300_winsys_bo *buf, struct r300_winsys_cs *cs, enum pipe_transfer_usage usage); /** * Unmap a buffer object from the client's address space. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to unmap. */ - void (*buffer_unmap)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_unmap)(struct r300_winsys_bo *buf); + + /** + * Return TRUE if a buffer object is being used by the GPU. + * + * \param buf A winsys buffer object. + */ + boolean (*buffer_is_busy)(struct r300_winsys_bo *buf); /** * Wait for a buffer object until it is not used by a GPU. This is * equivalent to a fence placed after the last command using the buffer, * and synchronizing to the fence. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to wait for. */ - void (*buffer_wait)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf); + void (*buffer_wait)(struct r300_winsys_bo *buf); /** * Return tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the flags from. * \param macrotile A pointer to the return value of the microtile flag. * \param microtile A pointer to the return value of the macrotile flag. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_get_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_get_tiling)(struct r300_winsys_bo *buf, enum r300_buffer_tiling *microtile, enum r300_buffer_tiling *macrotile); /** * Set tiling flags describing a memory layout of a buffer object. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to set the flags for. + * \param cs A command stream to flush if the buffer is referenced by it. * \param macrotile A macrotile flag. * \param microtile A microtile flag. * \param stride A stride of the buffer in bytes, for texturing. * * \note microtile and macrotile are not bitmasks! */ - void (*buffer_set_tiling)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + void (*buffer_set_tiling)(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, enum r300_buffer_tiling microtile, enum r300_buffer_tiling macrotile, unsigned stride); @@ -197,7 +191,7 @@ struct r300_winsys_screen { * \param stride The returned buffer stride in bytes. * \param size The returned buffer size. */ - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws, struct winsys_handle *whandle, unsigned *stride, unsigned *size); @@ -206,14 +200,12 @@ struct r300_winsys_screen { * Get a winsys handle from a winsys buffer. The internal structure * of the handle is platform-specific and only a winsys should access it. * - * \param ws The winsys this function is called from. * \param buf A winsys buffer object to get the handle from. * \param whandle A winsys handle pointer. * \param stride A stride of the buffer in bytes, for texturing. * \return TRUE on success. */ - boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, + boolean (*buffer_get_handle)(struct r300_winsys_bo *buf, unsigned stride, struct winsys_handle *whandle); @@ -248,7 +240,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_add_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, + struct r300_winsys_cs_handle *buf, enum r300_buffer_domain rd, enum r300_buffer_domain wd); @@ -269,7 +261,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf); + struct r300_winsys_cs_handle *buf); /** * Flush a command stream. @@ -279,6 +271,13 @@ struct r300_winsys_screen { void (*cs_flush)(struct r300_winsys_cs *cs); /** + * Wait until the last flush is completed. + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct r300_winsys_cs *cs); + + /** * Set a flush callback which is called from winsys when flush is * required. * @@ -291,16 +290,13 @@ struct r300_winsys_screen { void *user); /** - * Return TRUE if a buffer is referenced by a command stream or by hardware - * (i.e. is busy), based on the domain parameter. + * Return TRUE if a buffer is referenced by a command stream. * * \param cs A command stream. * \param buf A winsys buffer. - * \param domain A bitmask of the R300_REF_* enums. */ boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_reference_domain domain); + struct r300_winsys_cs_handle *buf); }; #endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index a690b671e49..436de9c4dbd 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -22,7 +22,6 @@ C_SOURCES = \ evergreen_state.c \ eg_asm.c \ r600_translate.c \ - r600_state_common.c \ - r600_upload.c + r600_state_common.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index e51f50c5df5..5a5fa6d65fd 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -28,7 +28,6 @@ r600 = env.ConvenienceLibrary( 'r600_state_common.c', 'r600_texture.c', 'r600_translate.c', - 'r600_upload.c', 'r700_asm.c', 'evergreen_state.c', 'eg_asm.c', diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 5a39d7cdeec..b5fcc7106fe 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_030000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_030000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_030000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_030000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_030000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_SWAP_ALT; + case PIPE_FORMAT_A8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; @@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_028C70_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_028C70_SWAP_STD; @@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_SWAP_STD_REV; + return V_028C70_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_028C70_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; @@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_028C70_COLOR_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_028C70_COLOR_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_028C70_COLOR_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { /* 8-bit buffers. */ + case PIPE_FORMAT_L4A4_UNORM: + return V_028C70_COLOR_4_4; + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; @@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_028C70_COLOR_8_8; @@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_028C70_COLOR_10_10_10_2; + return V_028C70_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_028C70_COLOR_32_32; /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_028C70_COLOR_32_32_32_32; case PIPE_FORMAT_R32G32B32_FLOAT: return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -501,9 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) -{ - return r600_translate_colorformat(format) != ~0; -} - #endif diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 306ca03234f..3efdbaba0c3 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } blend->cb_target_mask = target_mask; r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { @@ -351,7 +351,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct r600_resource *rbuffer; unsigned format; uint32_t word4 = 0, yuv_format = 0, pitch = 0; - unsigned char swizzle[4]; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; if (resource == NULL) @@ -380,35 +380,42 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_030010_NUM_FORMAT_ALL; + word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); + } + rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); + + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, S_030000_DIM(r600_tex_dim(texture->target)) | S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1), + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | + word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, @@ -431,7 +438,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou for (int i = 0; i < count; i++) { if (resource[i]) { - evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -446,9 +454,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -457,7 +467,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -638,11 +649,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned color_info; unsigned format, swap, ntype; unsigned offset; + unsigned tile_type; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; @@ -651,21 +670,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | + S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | S_028C70_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_028C70_SOURCE_FORMAT(1); + + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, + if we aren't a float, sint or uint */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && + ntype != 4 && ntype != 5) + color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); + + if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { + tile_type = rtex->tile_type; + } else /* workaround for linear buffers */ + tile_type = 1; /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, @@ -690,7 +731,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, - S_028C74_NON_DISP_TILING_ORDER(1), + S_028C74_NON_DISP_TILING_ORDER(tile_type), 0xFFFFFFFF, bo[0]); } @@ -711,17 +752,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); @@ -837,51 +875,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } -static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - uint32_t offset; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - r600_upload_const_buffer(rctx, buffer, &offset); - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -909,7 +902,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; - rctx->context.set_constant_buffer = evergreen_set_constant_buffer; + rctx->context.set_constant_buffer = r600_set_constant_buffer; rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view; rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state; rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple; @@ -921,6 +914,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1325,216 +1319,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) } } -static void evergreen_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i]; - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = 0; - } - if (vertex_buffer == NULL || rbuffer == NULL) - continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); - evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - -int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - struct r600_drawl draw; - unsigned prim; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } - if (r600_conv_pipe_prim(draw.mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - evergreen_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw.count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw.index_buffer) { - rbuffer = (struct r600_resource*)draw.index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw.index_buffer_offset; - } - evergreen_context_draw(&rctx->ctx, &rdraw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_state *rstate = &shader->rstate; @@ -1733,3 +1517,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028000_COPY_CENTROID(1), NULL); return rstate; } + +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, + S_030008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e09e02ca000..f0a1ee0cd02 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -327,6 +327,9 @@ #define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24) #define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3) #define C_028C70_SOURCE_FORMAT 0xFCFFFFFF +#define V_028C70_EXPORT_4C_32BPC 0x0 +#define V_028C70_EXPORT_4C_16BPC 0x1 +#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */ #define S_028C70_RAT(x) (((x) & 0x1) << 26) #define G_028C70_RAT(x) (((x) >> 26) & 0x1) #define C_028C70_RAT 0xFBFFFFFF @@ -427,15 +430,6 @@ #define C_028800_STENCILZFAIL_BF 0x1FFFFFFF #define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB #define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) #define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) #define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 @@ -939,6 +933,9 @@ #define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005 #define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006 #define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007 +#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5) +#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1) +#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF #define S_030000_PITCH(x) (((x) & 0xFFF) << 6) #define G_030000_PITCH(x) (((x) >> 6) & 0xFFF) #define C_030000_PITCH 0xFFFC003F diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index a852bef6156..64c52bca795 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -113,6 +113,7 @@ struct r600_tiling_info { enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); +unsigned r600_get_clock_crystal_freq(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; @@ -249,6 +250,7 @@ struct r600_context { struct list_head query_list; unsigned num_query_running; struct list_head fenced_bo; + unsigned max_db; /* for OQ */ }; struct r600_draw { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f4ff2fc3d43..1393df88757 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -50,6 +50,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: @@ -97,6 +98,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: @@ -288,6 +290,31 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) { int r; + if (bc->cf_last && bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && + output->type == bc->cf_last->output.type && + output->elem_size == bc->cf_last->output.elem_size && + output->swizzle_x == bc->cf_last->output.swizzle_x && + output->swizzle_y == bc->cf_last->output.swizzle_y && + output->swizzle_z == bc->cf_last->output.swizzle_z && + output->swizzle_w == bc->cf_last->output.swizzle_w && + (output->burst_count + bc->cf_last->output.burst_count) <= 16) { + + if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && + (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { + + bc->cf_last->output.gpr = output->gpr; + bc->cf_last->output.array_base = output->array_base; + bc->cf_last->output.burst_count += output->burst_count; + return 0; + + } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && + output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { + + bc->cf_last->output.burst_count += output->burst_count; + return 0; + } + } + r = r600_bc_add_cf(bc); if (r) return r; @@ -418,6 +445,20 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) } } +static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + case CHIPREV_R700: + return !alu->is_op3 && + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + case CHIPREV_EVERGREEN: + default: + return !alu->is_op3 && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + } +} + static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { switch (bc->chiprev) { @@ -480,9 +521,9 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) case CHIPREV_EVERGREEN: default: if (!alu->is_op3) + /* Note that FLT_TO_INT* instructions are vector instructions + * on Evergreen, despite what the documentation says. */ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || - alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT || - alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT || @@ -563,7 +604,7 @@ struct alu_bank_swizzle { int hw_cfile_elem[4]; }; -const unsigned cycle_for_bank_swizzle_vec[][3] = { +static const unsigned cycle_for_bank_swizzle_vec[][3] = { [SQ_ALU_VEC_012] = { 0, 1, 2 }, [SQ_ALU_VEC_021] = { 0, 2, 1 }, [SQ_ALU_VEC_120] = { 1, 2, 0 }, @@ -572,7 +613,7 @@ const unsigned cycle_for_bank_swizzle_vec[][3] = { [SQ_ALU_VEC_210] = { 2, 1, 0 } }; -const unsigned cycle_for_bank_swizzle_scl[][3] = { +static const unsigned cycle_for_bank_swizzle_scl[][3] = { [SQ_ALU_SCL_210] = { 2, 1, 0 }, [SQ_ALU_SCL_122] = { 1, 2, 2 }, [SQ_ALU_SCL_212] = { 2, 1, 2 }, @@ -785,7 +826,8 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, for (i = 0; i < 5; ++i) { if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; - if (is_alu_reduction_inst(bc, prev[i])) + /* cube writes more than PV.X */ + if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) chan[i] = 0; else chan[i] = prev[i]->dst.chan; @@ -865,7 +907,7 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu, for (i = 0; i < num_src; ++i) { if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - uint32_t value = alu->src[i].value[alu->src[i].chan]; + uint32_t value = alu->src[i].value; unsigned found = 0; for (j = 0; j < *nliteral; ++j) { if (literal[j] == value) { @@ -892,7 +934,7 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc, for (i = 0; i < num_src; ++i) { if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { - uint32_t value = alu->src[i].value[alu->src[i].chan]; + uint32_t value = alu->src[i].value; for (j = 0; j < nliteral; ++j) { if (literal[j] == value) { alu->src[i].chan = j; @@ -1195,8 +1237,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->ngpr = nalu->src[i].sel + 1; } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) - r600_bc_special_constants( - nalu->src[i].value[nalu->src[i].chan], + r600_bc_special_constants(nalu->src[i].value, &nalu->src[i].sel, &nalu->src[i].neg); } if (nalu->dst.sel >= bc->ngpr) { @@ -1308,6 +1349,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return -ENOMEM; memcpy(ntex, tex, sizeof(struct r600_bc_tex)); + /* we can't fetch data und use it as texture lookup address in the same TEX clause */ + if (bc->cf_last != NULL && + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { + struct r600_bc_tex *ttex; + LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { + if (ttex->dst_gpr == ntex->src_gpr) { + bc->force_add_cf = 1; + break; + } + } + } + /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX || @@ -1374,6 +1427,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign } } bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); @@ -2674,18 +2728,73 @@ void r600_bc_dump(struct r600_bc *bc) } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { - //TODO + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", tex->inst); + fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); + fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); + fprintf(stderr, "REL:%d)\n", tex->src_rel); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); + fprintf(stderr, "REL:%d ", tex->dst_rel); + fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); + fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); + fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); + fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); + fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); + fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); + id++; + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); + fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); + fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); + fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); + fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); + fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); + fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); + fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; } LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "INST:%d ", vtx->inst); + fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); + fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); + id++; + /* This assumes that no semantic fetches exist */ + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); + fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); + fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); + fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); + fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); + fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); + fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); + fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format); + fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all); + fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all); + fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all); + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); //TODO + id++; + fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + id++; } } fprintf(stderr, "--------------------------------------\n"); } -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) { struct r600_pipe_state *rstate; unsigned i = 0; @@ -2721,42 +2830,6 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) 0xFFFFFFFF, ve->fetch_shader); } -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) -{ - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT((count - 8) - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} - static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp) { @@ -2780,7 +2853,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, } switch (desc->channel[i].type) { - /* Half-floats, floats, doubles */ + /* Half-floats, floats, ints */ case UTIL_FORMAT_TYPE_FLOAT: switch (desc->channel[i].size) { case 16: @@ -2792,8 +2865,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_FLOAT; break; case 3: - *format = FMT_16_16_16_FLOAT; - break; case 4: *format = FMT_16_16_16_16_FLOAT; break; @@ -2833,8 +2904,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_8_8; break; case 3: - // *format = FMT_8_8_8; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_8_8_8_8; break; @@ -2849,8 +2918,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16; break; case 3: - // *format = FMT_16_16_16; /* fails piglit draw-vertices test */ - // break; case 4: *format = FMT_16_16_16_16; break; @@ -2938,10 +3005,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp); - desc = util_format_description(ve->hw_format[i]); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { - R600_ERR("unknown format %d\n", ve->hw_format[i]); + R600_ERR("unknown format %d\n", ve->elements[i].src_format); r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); return -EINVAL; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 519245f3af2..453c29790c1 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -34,7 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; - u32 *value; + uint32_t value; }; struct r600_bc_alu_dst { @@ -201,8 +201,6 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); -void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); -void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index b9ec9592e35..9865ea17ae5 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + rctx->blit = true; r600_context_queries_suspend(&rctx->ctx); util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); @@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op if (rctx->states[R600_PIPE_STATE_CLIP]) { util_blitter_save_clip(rctx->blitter, &rctx->clip); } - util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); - - rctx->vertex_elements = NULL; + util_blitter_save_vertex_buffers(rctx->blitter, + rctx->vbuf_mgr->nr_vertex_buffers, + rctx->vbuf_mgr->vertex_buffer); if (op & (R600_CLEAR_SURFACE | R600_COPY)) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); @@ -76,6 +77,7 @@ static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; r600_context_queries_resume(&rctx->ctx); + rctx->blit = false; } void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) @@ -84,13 +86,17 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t struct pipe_surface *zsurf, *cbsurf, surf_tmpl; int level = 0; float depth = 1.0f; - surf_tmpl.format = texture->resource.base.b.format; + + if (!texture->dirty_db) + return; + + surf_tmpl.format = texture->resource.b.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL; - zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl); + zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl); surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; @@ -107,6 +113,48 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); + + texture->dirty_db = FALSE; +} + +void r600_flush_depth_textures(struct r600_pipe_context *rctx) +{ + unsigned int i; + + if (rctx->blit) return; + + /* FIXME: This handles fragment shader textures only. */ + + for (i = 0; i < rctx->ps_samplers.n_views; ++i) { + struct r600_pipe_sampler_view *view; + struct r600_resource_texture *tex; + + view = rctx->ps_samplers.views[i]; + if (!view) continue; + + tex = (struct r600_resource_texture *)view->base.texture; + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } + + /* also check CB here */ + for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + struct r600_resource_texture *tex; + tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture; + + if (!tex->depth) + continue; + + if (tex->is_flushing_texture) + continue; + + r600_blit_uncompress_depth(&rctx->context, tex); + } } static void r600_clear(struct pipe_context *ctx, unsigned buffers, @@ -171,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx, r600_blitter_end(ctx); } +struct texture_orig_info { + unsigned format; + unsigned width0; + unsigned height0; +}; + +static void r600_s3tc_to_blittable(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + unsigned pixsize = util_format_get_blocksize(tex->format); + int new_format; + int new_height, new_width; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + + if (pixsize == 8) + new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */ + else + new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */ + + new_width = util_format_get_nblocksx(tex->format, orig->width0); + new_height = util_format_get_nblocksy(tex->format, orig->height0); + + rtex->force_int_type = true; + tex->width0 = new_width; + tex->height0 = new_height; + tex->format = new_format; + +} + +static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex; + rtex->force_int_type = false; + + tex->format = orig->format; + tex->width0 = orig->width0; + tex->height0 = orig->height0; +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -179,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned src_level, const struct pipe_box *src_box) { - boolean is_depth; - /* there is something wrong with depth resource copies at the moment so avoid them for now */ - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - if (is_depth) - util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - else - r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; + struct texture_orig_info orig_info[2]; + boolean restore_orig[2]; + + if (rsrc->depth && !rsrc->is_flushing_texture) + r600_texture_depth_flush(ctx, src, FALSE); + + restore_orig[0] = restore_orig[1] = FALSE; + + if (util_format_is_s3tc(src->format)) { + r600_s3tc_to_blittable(src, src_level, &orig_info[0]); + restore_orig[0] = TRUE; + } + + if (util_format_is_s3tc(dst->format)) { + r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); + restore_orig[1] = TRUE; + /* translate the dst box as well */ + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + dsty = util_format_get_nblocksy(orig_info[1].format, dsty); + } + + r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + + if (restore_orig[0]) + r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]); + + if (restore_orig[1]) + r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) @@ -197,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx) rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } + +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) +{ + struct pipe_box sbox; + + sbox.x = sbox.y = sbox.z = 0; + sbox.width = texture->resource.b.b.b.width0; + sbox.height = texture->resource.b.b.b.height0; + /* XXX that might be wrong */ + sbox.depth = 1; + + r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0, + 0, 0, 0, + (struct pipe_resource *)texture->flushed_depth_texture, 0, + &sbox); +} diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 469c8195fe9..0c5d7133c7a 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -29,85 +29,58 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> +#include "util/u_upload_mgr.h" + #include "state_tracker/drm_driver.h" + #include <xf86drm.h> #include "radeon_drm.h" + #include "r600.h" #include "r600_pipe.h" -extern struct u_resource_vtbl r600_buffer_vtbl; - - -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ) -{ - struct r600_resource_buffer *rbuffer; - struct r600_bo *bo; - /* XXX We probably want a different alignment for buffers and textures. */ - unsigned alignment = 4096; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - rbuffer->user_buffer = NULL; - rbuffer->r.base.b = *templ; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.size = rbuffer->r.base.b.width0; - rbuffer->r.bo_size = rbuffer->r.size; - rbuffer->uploaded = FALSE; - bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage); - if (bo == NULL) { - FREE(rbuffer); - return NULL; - } - rbuffer->r.bo = bo; - return &rbuffer->r.base.b; -} - -struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, - void *ptr, unsigned bytes, - unsigned bind) -{ - struct r600_resource_buffer *rbuffer; - - rbuffer = CALLOC_STRUCT(r600_resource_buffer); - if (rbuffer == NULL) - return NULL; - - rbuffer->magic = R600_BUFFER_MAGIC; - pipe_reference_init(&rbuffer->r.base.b.reference, 1); - rbuffer->r.base.vtbl = &r600_buffer_vtbl; - rbuffer->r.base.b.screen = screen; - rbuffer->r.base.b.target = PIPE_BUFFER; - rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM; - rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE; - rbuffer->r.base.b.bind = bind; - rbuffer->r.base.b.width0 = bytes; - rbuffer->r.base.b.height0 = 1; - rbuffer->r.base.b.depth0 = 1; - rbuffer->r.base.b.array_size = 1; - rbuffer->r.base.b.flags = 0; - rbuffer->r.bo = NULL; - rbuffer->r.bo_size = 0; - rbuffer->user_buffer = ptr; - rbuffer->uploaded = FALSE; - return &rbuffer->r.base.b; -} - static void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { + struct r600_screen *rscreen = (struct r600_screen*)screen; struct r600_resource_buffer *rbuffer = r600_buffer(buf); if (rbuffer->r.bo) { r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL); } rbuffer->r.bo = NULL; - FREE(rbuffer); + util_slab_free(&rscreen->pool_buffers, rbuffer); +} + +static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, + struct pipe_resource *buf, + unsigned level, int layer) +{ + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); + + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->layer_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; } static void *r600_buffer_transfer_map(struct pipe_context *pipe, @@ -117,8 +90,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, int write = 0; uint8_t *data; - if (rbuffer->user_buffer) - return (uint8_t*)rbuffer->user_buffer + transfer->box.x; + if (rbuffer->r.b.user_ptr) + return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x; if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) { /* FIXME */ @@ -138,7 +111,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource); - if (rbuffer->user_buffer) + if (rbuffer->r.b.user_ptr) return; if (rbuffer->r.bo) @@ -151,128 +124,163 @@ static void r600_buffer_transfer_flush_region(struct pipe_context *pipe, { } -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) +static void r600_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *transfer) { - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx; + util_slab_free(&rctx->pool_transfers, transfer); } -struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, - struct winsys_handle *whandle) +static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { - struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource *rbuffer; - struct r600_bo *bo = NULL; + struct radeon *ws = (struct radeon*)pipe->winsys; + struct r600_resource_buffer *rbuffer = r600_buffer(resource); + uint8_t *map = NULL; - bo = r600_bo_handle(rw, whandle->handle, NULL); - if (bo == NULL) { - return NULL; - } + assert(rbuffer->r.b.user_ptr == NULL); - rbuffer = CALLOC_STRUCT(r600_resource); - if (rbuffer == NULL) { - r600_bo_reference(rw, &bo, NULL); - return NULL; - } + map = r600_bo_map(ws, rbuffer->r.bo, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage, + pipe); - pipe_reference_init(&rbuffer->base.b.reference, 1); - rbuffer->base.b.target = PIPE_BUFFER; - rbuffer->base.b.screen = screen; - rbuffer->base.vtbl = &r600_buffer_vtbl; - rbuffer->bo = bo; - return &rbuffer->base.b; + memcpy(map + box->x, data, box->width); + + if (rbuffer->r.bo) + r600_bo_unmap(ws, rbuffer->r.bo); } -struct u_resource_vtbl r600_buffer_vtbl = +static const struct u_resource_vtbl r600_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ r600_buffer_destroy, /* resource_destroy */ r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + r600_get_transfer, /* get_transfer */ + r600_transfer_destroy, /* transfer_destroy */ r600_buffer_transfer_map, /* transfer_map */ r600_buffer_transfer_flush_region, /* transfer_flush_region */ r600_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + r600_buffer_transfer_inline_write /* transfer_inline_write */ }; -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) { - if (r600_buffer_is_user_buffer(draw->index_buffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); - unsigned upload_offset; - int ret = 0; - - ret = r600_upload_buffer(rctx->rupload_vb, - draw->index_buffer_offset, - draw->count * draw->index_size, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - draw->index_buffer_offset = upload_offset; + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; + struct r600_bo *bo; + /* XXX We probably want a different alignment for buffers and textures. */ + unsigned alignment = 4096; + + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + rbuffer->r.b.b.b = *templ; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.user_ptr = NULL; + rbuffer->r.size = rbuffer->r.b.b.b.width0; + rbuffer->r.bo_size = rbuffer->r.size; + + bo = r600_bo((struct radeon*)screen->winsys, + rbuffer->r.b.b.b.width0, + alignment, rbuffer->r.b.b.b.bind, + rbuffer->r.b.b.b.usage); + + if (bo == NULL) { + FREE(rbuffer); + return NULL; } + rbuffer->r.bo = bo; + return &rbuffer->r.b.b.b; +} + +struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, + void *ptr, unsigned bytes, + unsigned bind) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource_buffer *rbuffer; - return 0; + rbuffer = util_slab_alloc(&rscreen->pool_buffers); + + rbuffer->magic = R600_BUFFER_MAGIC; + pipe_reference_init(&rbuffer->r.b.b.b.reference, 1); + rbuffer->r.b.b.vtbl = &r600_buffer_vtbl; + rbuffer->r.b.b.b.screen = screen; + rbuffer->r.b.b.b.target = PIPE_BUFFER; + rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM; + rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE; + rbuffer->r.b.b.b.bind = bind; + rbuffer->r.b.b.b.width0 = bytes; + rbuffer->r.b.b.b.height0 = 1; + rbuffer->r.b.b.b.depth0 = 1; + rbuffer->r.b.b.b.array_size = 1; + rbuffer->r.b.b.b.flags = 0; + rbuffer->r.b.user_ptr = ptr; + rbuffer->r.bo = NULL; + rbuffer->r.bo_size = 0; + return &rbuffer->r.b.b.b; } -int r600_upload_user_buffers(struct r600_pipe_context *rctx) +struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle) { - enum pipe_error ret = PIPE_OK; - int i, nr; - - nr = rctx->vertex_elements->count; - nr = rctx->nvertex_buffer; - - for (i = 0; i < nr; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (r600_buffer_is_user_buffer(vb->buffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer); - unsigned upload_offset; - - ret = r600_upload_buffer(rctx->rupload_vb, - 0, vb->buffer->width0, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - vb->buffer_offset = upload_offset; - } + struct radeon *rw = (struct radeon*)screen->winsys; + struct r600_resource *rbuffer; + struct r600_bo *bo = NULL; + + bo = r600_bo_handle(rw, whandle->handle, NULL); + if (bo == NULL) { + return NULL; + } + + rbuffer = CALLOC_STRUCT(r600_resource); + if (rbuffer == NULL) { + r600_bo_reference(rw, &bo, NULL); + return NULL; } - return ret; + + pipe_reference_init(&rbuffer->b.b.b.reference, 1); + rbuffer->b.b.b.target = PIPE_BUFFER; + rbuffer->b.b.b.screen = screen; + rbuffer->b.b.vtbl = &r600_buffer_vtbl; + rbuffer->bo = bo; + return &rbuffer->b.b.b; } +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw) +{ + struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer); + boolean flushed; + + u_upload_data(rctx->vbuf_mgr->uploader, 0, + draw->info.count * draw->index_size, + rbuffer->r.b.user_ptr, + &draw->index_buffer_offset, + &draw->index_buffer, &flushed); +} -int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *const_offset) { - if (r600_buffer_is_user_buffer(cbuffer)) { - struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer); - unsigned upload_offset; - int ret = 0; - - ret = r600_upload_buffer(rctx->rupload_const, - 0, cbuffer->width0, - rbuffer, - &upload_offset, - &rbuffer->r.bo_size, - &rbuffer->r.bo); - if (ret) - return ret; - rbuffer->uploaded = TRUE; - *const_offset = upload_offset; - return 0; - } + if ((*rbuffer)->r.b.user_ptr) { + uint8_t *ptr = (*rbuffer)->r.b.user_ptr; + unsigned size = (*rbuffer)->r.b.b.b.width0; + boolean flushed; + + *rbuffer = NULL; - *const_offset = 0; - return 0; + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + } else { + *const_offset = 0; + } } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 68b625cc3b4..34094001b75 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,11 +30,13 @@ #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> #include <util/u_double_list.h> +#include <util/u_format_s3tc.h> #include <util/u_transfer.h> #include <util/u_surface.h> #include <util/u_pack_color.h> #include <util/u_memory.h> #include <util/u_inlines.h> +#include "util/u_upload_mgr.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -69,8 +71,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, #endif r600_context_flush(&rctx->ctx); - r600_upload_flush(rctx->rupload_vb); - r600_upload_flush(rctx->rupload_const); + /* XXX This shouldn't be really necessary, but removing it breaks some tests. + * Needless buffer reallocations may significantly increase memory consumption, + * so getting rid of this call is important. */ + u_upload_flush(rctx->vbuf_mgr->uploader); +} + +static void r600_update_num_contexts(struct r600_screen *rscreen, + int diff) +{ + pipe_mutex_lock(rscreen->mutex_num_contexts); + if (diff > 0) { + rscreen->num_contexts++; + + if (rscreen->num_contexts > 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_MULTITHREADED); + } else { + rscreen->num_contexts--; + + if (rscreen->num_contexts <= 1) + util_slab_set_thread_safety(&rscreen->pool_buffers, + UTIL_SLAB_SINGLETHREADED); + } + pipe_mutex_unlock(rscreen->mutex_num_contexts); } static void r600_destroy_context(struct pipe_context *context) @@ -79,8 +103,6 @@ static void r600_destroy_context(struct pipe_context *context) rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); - r600_end_vertex_translate(rctx); - r600_context_fini(&rctx->ctx); util_blitter_destroy(rctx->blitter); @@ -89,14 +111,11 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } - r600_upload_destroy(rctx->rupload_vb); - r600_upload_destroy(rctx->rupload_const); + u_vbuf_mgr_destroy(rctx->vbuf_mgr); + util_slab_destroy(&rctx->pool_transfers); - if (rctx->tran.translate_cache) - translate_cache_destroy(rctx->tran.translate_cache); + r600_update_num_contexts(rctx->screen, -1); - FREE(rctx->ps_resource); - FREE(rctx->vs_resource); FREE(rctx); } @@ -108,6 +127,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (rctx == NULL) return NULL; + + r600_update_num_contexts(rscreen, 1); + rctx->context.winsys = rscreen->screen.winsys; rctx->context.screen = screen; rctx->context.priv = priv; @@ -123,6 +145,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); r600_init_surface_functions(rctx); + rctx->context.draw_vbo = r600_draw_vbo; switch (r600_get_family(rctx->radeon)) { case CHIP_R600: @@ -137,7 +160,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_RV730: case CHIP_RV710: case CHIP_RV740: - rctx->context.draw_vbo = r600_draw_vbo; r600_init_state_functions(rctx); if (r600_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -154,7 +176,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: - rctx->context.draw_vbo = evergreen_draw; evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -168,39 +189,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16); - if (rctx->rupload_vb == NULL) { - r600_destroy_context(&rctx->context); - return NULL; - } + util_slab_create(&rctx->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_SLAB_SINGLETHREADED); - rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256); - if (rctx->rupload_const == NULL) { + rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256, + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER, + U_VERTEX_FETCH_DWORD_ALIGNED); + if (!rctx->vbuf_mgr) { r600_destroy_context(&rctx->context); return NULL; } rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) { - FREE(rctx); - return NULL; - } - - rctx->tran.translate_cache = translate_cache_create(); - if (rctx->tran.translate_cache == NULL) { - FREE(rctx); - return NULL; - } - - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_resource) { - FREE(rctx); - return NULL; - } - - rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->ps_resource) { - FREE(rctx); + r600_destroy_context(&rctx->context); return NULL; } @@ -284,13 +289,16 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 1; /* Unsupported features (boolean caps). */ - case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ case PIPE_CAP_INSTANCED_DRAWING: return 0; + case PIPE_CAP_ARRAY_TEXTURES: + /* fix once the CS checker upstream is fixed */ + return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE); + /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -319,6 +327,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; + /* Timer queries, present when the clock frequency is non zero. */ + case PIPE_CAP_TIMER_QUERY: + return r600_get_clock_crystal_freq(rscreen->radeon) != 0; + default: R600_ERR("r600: unknown param %d\n", param); return 0; @@ -385,7 +397,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONSTS: return 256; //max native parameters case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 1; + return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* FIXME */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -441,9 +453,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_DEPTH_STENCIL; } - if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) - retval |= PIPE_BIND_VERTEX_BUFFER; + if (usage & PIPE_BIND_VERTEX_BUFFER) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + enum radeon_family family = r600_get_family(rscreen->radeon); + + if (r600_is_vertex_format_supported(format, family)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + } if (usage & PIPE_BIND_TRANSFER_READ) retval |= PIPE_BIND_TRANSFER_READ; @@ -462,6 +479,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) radeon_decref(rscreen->radeon); + util_slab_destroy(&rscreen->pool_buffers); + pipe_mutex_destroy(rscreen->mutex_num_contexts); FREE(rscreen); } @@ -489,6 +508,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); + util_format_s3tc_init(); + + util_slab_create(&rscreen->pool_buffers, + sizeof(struct r600_resource_buffer), 64, + UTIL_SLAB_SINGLETHREADED); + + pipe_mutex_init(rscreen->mutex_num_contexts); return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 7f74fda0daf..8dc1f4ad5c3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,12 +30,15 @@ #include <pipe/p_screen.h> #include <pipe/p_context.h> #include <util/u_math.h> -#include "translate/translate_cache.h" +#include "util/u_slab.h" +#include "util/u_vbuf_mgr.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" #include "r600_resource.h" +#define R600_MAX_CONST_BUFFERS 1 + enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, @@ -62,6 +65,11 @@ struct r600_screen { struct pipe_screen screen; struct radeon *radeon; struct r600_tiling_info *tiling_info; + struct util_slab_mempool pool_buffers; + unsigned num_contexts; + + /* for thread-safe write accessing to num_contexts */ + pipe_mutex mutex_num_contexts; }; struct r600_pipe_sampler_view { @@ -86,9 +94,7 @@ struct r600_vertex_element { unsigned count; struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; - enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; - unsigned hw_format_size[PIPE_MAX_ATTRIBS]; - boolean incompatible_layout; + struct u_vbuf_mgr_elements *vmgr_elements; struct r600_bo *fetch_shader; unsigned fs_size; struct r600_pipe_state rstate; @@ -117,22 +123,9 @@ struct r600_textures_info { unsigned n_samplers; }; -/* vertex buffer translation context, used to translate vertex input that - * hw doesn't natively support, so far only FLOAT64 is unsupported. - */ -struct r600_translate_context { - /* Translate cache for incompatible vertex offset/stride/format fallback. */ - struct translate_cache *translate_cache; - /* The vertex buffer slot containing the translated buffer. */ - unsigned vb_slot; - void *new_velems; -}; - #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 -struct r600_upload; - struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -143,43 +136,35 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; + struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nvertex_buffer; unsigned cb_target_mask; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned nvs_resource; - struct r600_pipe_state *vs_resource; - struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; + struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; + struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; /* shader information */ unsigned sprite_coord_enable; bool flatshade; - struct r600_upload *rupload_vb; - unsigned any_user_vbs; struct r600_textures_info ps_samplers; - unsigned vb_max_index; - struct r600_translate_context tran; - struct r600_upload *rupload_const; + + struct u_vbuf_mgr *vbuf_mgr; + struct util_slab_mempool pool_transfers; + bool blit; }; struct r600_drawl { + struct pipe_draw_info info; struct pipe_context *ctx; - unsigned mode; - unsigned min_index; - unsigned max_index; - unsigned index_bias; - unsigned start; - unsigned count; unsigned index_size; unsigned index_buffer_offset; struct pipe_resource *index_buffer; @@ -188,16 +173,20 @@ struct r600_drawl { /* evergreen_state.c */ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); -void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx); +void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_blit.c */ void r600_init_blit_functions(struct r600_pipe_context *rctx); void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture); +void r600_flush_depth_textures(struct r600_pipe_context *rctx); /* r600_buffer.c */ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, @@ -205,13 +194,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned bytes, unsigned bind); -unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer); struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); -int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); -int r600_upload_user_buffers(struct r600_pipe_context *rctx); +void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw); /* r600_query.c */ void r600_init_query_functions(struct r600_pipe_context *rctx); @@ -220,7 +205,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, @@ -228,11 +212,14 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, /* r600_state.c */ void r600_init_state_functions(struct r600_pipe_context *rctx); -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_vertex_buffer_update(struct r600_pipe_context *rctx); +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -247,8 +234,6 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned layer); /* r600_translate.c */ -void r600_begin_vertex_translate(struct r600_pipe_context *rctx); -void r600_end_vertex_translate(struct r600_pipe_context *rctx); void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -277,6 +262,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state); void r600_bind_vs_shader(struct pipe_context *ctx, void *state); void r600_delete_ps_shader(struct pipe_context *ctx, void *state); void r600_delete_vs_shader(struct pipe_context *ctx, void *state); +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer); +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); /* * common helpers diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 28b3e1e5e40..836e7491f1f 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -24,6 +24,7 @@ #define R600_RESOURCE_H #include "util/u_transfer.h" +#include "util/u_vbuf_mgr.h" /* flag to indicate a resource is to be used as a transfer so should not be tiled */ #define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV @@ -43,7 +44,7 @@ struct r600_transfer { * underlying implementations. */ struct r600_resource { - struct u_resource base; + struct u_vbuf_resource b; struct r600_bo *bo; u32 size; unsigned bo_size; @@ -52,26 +53,31 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; - unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */ + unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; - unsigned tiled; unsigned tile_type; unsigned depth; - unsigned dirty; + unsigned dirty_db; struct r600_resource_texture *flushed_depth_texture; + boolean is_flushing_texture; + + /* on some cards we have to use integer 64/128-bit types + for s3tc blits, do this until gallium grows int formats */ + boolean force_int_type; }; +#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED) + #define R600_BUFFER_MAGIC 0xabcd1600 +/* XXX this could be removed */ struct r600_resource_buffer { struct r600_resource r; uint32_t magic; - void *user_buffer; - bool uploaded; }; struct r600_surface { @@ -98,14 +104,7 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf return NULL; } -static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer) -{ - if (r600_buffer(buffer)->uploaded) - return FALSE; - return r600_buffer(buffer)->user_buffer ? TRUE : FALSE; -} - -int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture); +int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create); /* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, @@ -121,15 +120,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); struct r600_pipe_context; -struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, - unsigned default_size, - unsigned alignment); -void r600_upload_flush(struct r600_upload *upload); -void r600_upload_destroy(struct r600_upload *upload); -int r600_upload_buffer(struct r600_upload *upload, unsigned offset, - unsigned size, struct r600_resource_buffer *in_buffer, - unsigned *out_offset, unsigned *out_size, - struct r600_bo **out_buffer); - -int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset); + +void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset); + #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c982471a04f..240c8f1ffd0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,6 +28,7 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include <stdio.h> @@ -175,6 +176,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade R_0288CC_SQ_PGM_CF_OFFSET_PS, 0x00000000, 0xFFFFFFFF, NULL); + if (rshader->fs_write_all) { + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(1), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + } + if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -187,7 +195,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade 0xFFFFFFFF, NULL); } -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; @@ -225,12 +233,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - u32 *literals; int r; /* Would like some magic "get_bool_option_once" routine. @@ -243,13 +251,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s tgsi_dump(tokens, 0); } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader, &literals); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } r = r600_bc_build(&shader->shader.bc); - free(literals); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -274,6 +281,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader */ struct r600_shader_tgsi_instruction; +struct r600_shader_src { + unsigned sel; + unsigned swizzle[4]; + unsigned neg; + unsigned abs; + unsigned rel; + uint32_t value[4]; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -281,9 +297,11 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; + struct r600_shader_src src[3]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -492,9 +510,179 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_shader_src *r600_src) +{ + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; + r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } +} + +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].rel = ctx->src[i].rel; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + ctx->src[i].rel =0; + j--; + } + } + return 0; +} + +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + nliteral++; + } + } + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].value = ctx->src[i].value[k]; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + j--; + } + } + return 0; +} + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; + struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned noutput; @@ -558,12 +746,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; - + shader->fs_write_all = FALSE; while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -592,7 +781,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.max_driver_temp_used = 0; /* reserve first tmp for everyone */ r600_get_temp(&ctx); + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -602,6 +796,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s goto out_err; break; case TGSI_TOKEN_TYPE_PROPERTY: + property = &ctx.parse.FullToken.FullProperty; + if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + if (property->u[0].Data == 1) + shader->fs_write_all = TRUE; + } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); @@ -619,6 +818,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; switch (ctx.type) { @@ -680,6 +880,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; noutput++; @@ -694,6 +895,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_y = 7; output[0].swizzle_z = 7; output[0].swizzle_w = 7; + output[0].burst_count = 1; output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; noutput++; @@ -704,7 +906,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; } - *literals = ctx.literals; + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; out_err: @@ -724,40 +926,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) +static void r600_bc_src(struct r600_bc_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) { - memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); - r600_src->neg = tgsi_src->Register.Negate; - r600_src->abs = tgsi_src->Register.Absolute; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - int index; - if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { - - index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); - if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) - return 0; - } - index = tgsi_src->Register.Index; - r600_src->sel = V_SQ_ALU_SRC_LITERAL; - r600_src->value = ctx->literals + index * 4; - } else { - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; - r600_src->sel = tgsi_src->Register.Index; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - } - return 0; + bc_src->sel = shader_src->sel; + bc_src->chan = shader_src->swizzle[chan]; + bc_src->neg = shader_src->neg; + bc_src->abs = shader_src->abs; + bc_src->rel = shader_src->rel; + bc_src->value = shader_src->value[bc_src->chan]; } -static int tgsi_dst(struct r600_shader_ctx *ctx, - const struct tgsi_full_dst_register *tgsi_dst, - unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) +static void tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -770,101 +954,6 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - return 0; -} - -static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) -{ - switch (swizzle) { - case 0: - return tgsi_src->Register.SwizzleX; - case 1: - return tgsi_src->Register.SwizzleY; - case 2: - return tgsi_src->Register.SwizzleZ; - case 3: - return tgsi_src->Register.SwizzleW; - default: - return 0; - } -} - -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nconst, r; - - for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - nconst++; - } - r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); - if (r) { - return r; - } - } - for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].rel = r600_src[i].rel; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - r600_src[i].rel =0; - j--; - } - } - return 0; -} - -/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nliteral, r; - - for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { - nliteral++; - } - } - for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].value = r600_src[i].value; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - j--; - } - } - return 0; } static int tgsi_last_instruction(unsigned writemask) @@ -882,38 +971,25 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } } else { - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -951,24 +1027,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * r700 - normalize by dividing by 2PI * see fdo bug 27901 */ -static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_bc_alu_src r600_src[3]) +static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { static float half_inv_pi = 1.0 /(3.1415926535 * 2); static float double_pi = 3.1415926535 * 2; static float neg_pi = -3.1415926535; - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int r; struct r600_bc_alu alu; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -977,12 +1044,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value = (uint32_t *)&half_inv_pi; + alu.src[1].value = *(uint32_t *)&half_inv_pi; alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; @@ -1021,8 +1087,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[2].chan = 0; if (ctx->bc->chiprev == CHIPREV_R600) { - alu.src[1].value = (uint32_t *)&double_pi; - alu.src[2].value = (uint32_t *)&neg_pi; + alu.src[1].value = *(uint32_t *)&double_pi; + alu.src[2].value = *(uint32_t *)&neg_pi; } else { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[2].sel = V_SQ_ALU_SRC_0_5; @@ -1039,12 +1105,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; @@ -1070,9 +1135,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1085,7 +1148,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1093,7 +1155,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) * X or Y components of the destination vector. */ if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; } @@ -1102,9 +1164,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1118,9 +1178,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1136,9 +1194,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = 0; @@ -1156,9 +1212,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1175,7 +1229,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, r; @@ -1191,10 +1244,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; @@ -1214,24 +1264,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1240,12 +1280,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - alu.src[0] = r600_src[0]; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1256,9 +1294,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1273,11 +1309,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1289,13 +1322,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1310,9 +1341,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1336,10 +1365,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1363,9 +1389,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; @@ -1385,10 +1409,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1402,17 +1423,13 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; @@ -1422,10 +1439,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1450,16 +1464,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int i, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1469,13 +1475,10 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; + r600_bc_src(&alu.src[2], &ctx->src[0], i); - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1488,9 +1491,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -1523,9 +1524,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru alu.dst.chan = i; } else { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; } @@ -1542,17 +1541,10 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1560,14 +1552,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -1584,28 +1572,17 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ @@ -1661,11 +1638,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -1679,10 +1653,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1735,14 +1706,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src2_chan = 0; break; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src_chan); + r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1782,7 +1747,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1803,7 +1768,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1822,10 +1787,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1846,7 +1808,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -1872,6 +1834,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_w = 1; } + if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { + tex.coord_type_z = 0; + tex.src_sel_z = 1; + } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) + tex.coord_type_z = 0; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = 2; @@ -1886,36 +1854,23 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* optimize if it's just an equal balance */ - if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -1936,8 +1891,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1959,8 +1913,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -1980,17 +1933,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - alu.src[1] = r600_src[1]; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -2005,37 +1953,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); - - alu.src[2] = r600_src[1]; - alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bc_src(&alu.src[2], &ctx->src[1], i); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2051,7 +1982,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2059,43 +1989,34 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2117,32 +2038,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2155,11 +2074,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (use_temp) alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + else + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2177,7 +2093,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; @@ -2186,11 +2101,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2218,11 +2129,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - alu.src[0] = r600_src[0]; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2242,10 +2149,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2288,11 +2192,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2321,11 +2221,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2385,11 +2281,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2409,11 +2301,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2451,6 +2339,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2465,23 +2354,23 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2495,26 +2384,48 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2534,26 +2445,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); } - if (i == 0 || i == 2) { + if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; @@ -2566,7 +2469,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; @@ -2578,10 +2480,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 935dd6fe3ab..8f96ce5085c 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -45,8 +45,7 @@ struct r600_shader { struct r600_shader_io output[32]; enum radeon_family family; boolean uses_kill; + boolean fs_write_all; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); - #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index de2668cee16..576067ae81e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -37,6 +37,7 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include <util/u_framebuffer.h> +#include "util/u_transfer.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -94,221 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx) } } -/* FIXME optimize away spi update when it's not needed */ -static void r600_spi_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; - - rstate.nregs = 0; - for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || - rshader->input[i].name == TGSI_SEMANTIC_POSITION) { - tmp |= S_028644_FLAT_SHADE(rctx->flatshade); - } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { - tmp |= S_028644_PT_SPRITE_TEX(1); - } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); -} - -void r600_vertex_buffer_update(struct r600_pipe_context *rctx) -{ - struct r600_pipe_state *rstate; - struct r600_resource *rbuffer; - struct pipe_vertex_buffer *vertex_buffer; - unsigned i, offset; - - /* we don't update until we know vertex elements */ - if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer) - return; - - if (rctx->vertex_elements->incompatible_layout) { - /* translate rebind new vertex elements so - * return once translated - */ - r600_begin_vertex_translate(rctx); - return; - } - - if (rctx->any_user_vbs) { - r600_upload_user_buffers(rctx); - rctx->any_user_vbs = FALSE; - } - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - rctx->nvs_resource = rctx->vertex_elements->count; - } else { - /* bind vertex buffer once */ - rctx->nvs_resource = rctx->nvertex_buffer; - } - - for (i = 0 ; i < rctx->nvs_resource; i++) { - rstate = &rctx->vs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - - if (rctx->vertex_elements->vbuffer_need_offset) { - /* one resource per vertex elements */ - unsigned vbuffer_index; - vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; - vertex_buffer = &rctx->vertex_buffer[vbuffer_index]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = rctx->vertex_elements->vbuffer_offset[i]; - } else { - /* bind vertex buffer once */ - vertex_buffer = &rctx->vertex_buffer[i]; - rbuffer = (struct r600_resource*)vertex_buffer->buffer; - offset = 0; - } - if (vertex_buffer == NULL || rbuffer == NULL) - continue; - offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(vertex_buffer->stride), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); - } -} - -static void r600_draw_common(struct r600_drawl *draw) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx; - struct r600_resource *rbuffer; - unsigned prim; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; - struct r600_draw rdraw; - struct r600_pipe_state vgt; - - switch (draw->index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw->index_size); - return; - } - if (r600_conv_pipe_prim(draw->mode, &prim)) - return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } - - r600_spi_update(rctx); - - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } - - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); - - rdraw.vgt_num_indices = draw->count; - rdraw.vgt_num_instances = 1; - rdraw.vgt_index_type = vgt_dma_index_type; - rdraw.vgt_draw_initiator = vgt_draw_initiator; - rdraw.indices = NULL; - if (draw->index_buffer) { - rbuffer = (struct r600_resource*)draw->index_buffer; - rdraw.indices = rbuffer->bo; - rdraw.indices_bo_offset = draw->index_buffer_offset; - } - r600_context_draw(&rctx->ctx, &rdraw); -} - -void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_drawl draw; - - memset(&draw, 0, sizeof(struct r600_drawl)); - draw.ctx = ctx; - draw.mode = info->mode; - draw.start = info->start; - draw.count = info->count; - if (info->indexed && rctx->index_buffer.buffer) { - draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->index_bias; - - r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer, - &rctx->index_buffer.index_size, - &draw.start, - info->count); - - draw.index_size = rctx->index_buffer.index_size; - pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); - draw.index_buffer_offset = draw.start * draw.index_size; - draw.start = 0; - r600_upload_index_buffer(rctx, &draw); - } else { - draw.index_size = 0; - draw.index_buffer = NULL; - draw.min_index = info->min_index; - draw.max_index = info->max_index; - draw.index_bias = info->start; - } - r600_draw_common(&draw); - - pipe_resource_reference(&draw.index_buffer, NULL); -} - static void r600_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) { @@ -616,6 +402,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; + unsigned height, depth; if (resource == NULL) return NULL; @@ -643,22 +430,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c if (desc == NULL) { R600_ERR("unknow format %d\n", state->format); } - tmp = (struct r600_resource_texture*)texture; + tmp = (struct r600_resource_texture *)texture; + if (tmp->depth && !tmp->is_flushing_texture) { + r600_texture_depth_flush(ctx, texture, TRUE); + tmp = tmp->flushed_depth_texture; + } + + if (tmp->force_int_type) { + word4 &= C_038010_NUM_FORMAT_ALL; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + } rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - /* FIXME depth texture decompression */ - if (tmp->depth) { - r600_texture_depth_flush(ctx, texture); - tmp = (struct r600_resource_texture*)texture; - rbuffer = &tmp->flushed_depth_texture->resource; - bo[0] = rbuffer->bo; - bo[1] = rbuffer->bo; - } - pitch = align(tmp->pitch_in_pixels[0], 8); - if (tmp->tiled) { - array_mode = tmp->array_mode[0]; - tile_type = tmp->tile_type; + pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[0]; + tile_type = tmp->tile_type; + + height = texture->height0; + depth = texture->depth0; + if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = 1; + depth = texture->array_size; + } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { + depth = texture->array_size; } /* FIXME properly handle first level != 0 */ @@ -669,22 +464,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(texture->height0 - 1) | - S_038004_TEX_DEPTH(texture->depth0 - 1) | + S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) | + word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(0) | - S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL); + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); @@ -714,9 +509,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { if (resource[i]) - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); else - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], @@ -726,7 +523,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, } for (i = count; i < NUM_TEX_UNITS; i++) { if (rctx->ps_samplers.views[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, + i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } @@ -908,21 +706,28 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; + int i; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + + if (rtex->depth && !rtex->is_flushing_texture) { + r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); + rtex = rtex->flushed_depth_texture; + } + rbuffer = &rtex->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; /* XXX quite sure for dx10+ hw don't need any offset hacks */ - offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture, + offset = r600_texture_get_offset(rtex, level, state->cbufs[cb]->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; ntype = 0; - desc = util_format_description(rtex->resource.base.b.format); + desc = util_format_description(surf->base.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_0280A0_NUMBER_SRGB; else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { @@ -937,15 +742,30 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta } } - format = r600_translate_colorformat(rtex->resource.base.b.format); - swap = r600_translate_colorswap(rtex->resource.base.b.format); + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + format = r600_translate_colorformat(surf->base.format); + swap = r600_translate_colorswap(surf->base.format); + + /* disable when gallium grows int textures */ + if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) + ntype = 4; + color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); - if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) - color_info |= S_0280A0_SOURCE_FORMAT(1); + + /* on R600 this can't be set if BLEND_CLAMP isn't set, + if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && + desc->channel[i].size < 12) + color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); r600_pipe_state_add_reg(rstate, R_028040_CB_COLOR0_BASE + cb * 4, @@ -989,17 +809,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; - rtex->tiled = 1; - rtex->array_mode[level] = 2; - rtex->tile_type = 1; - rtex->depth = 1; + rbuffer = &rtex->resource; /* XXX quite sure for dx10+ hw don't need any offset hacks */ offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture, level, state->zsbuf->u.tex.first_layer); - pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; + pitch = rtex->pitch_in_blocks[level] / 8 - 1; + slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, @@ -1115,51 +932,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } -static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_resource *buffer) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_resource *rbuffer = (struct r600_resource*)buffer; - uint32_t offset; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (buffer == NULL) { - return; - } - - r600_upload_const_buffer(rctx, buffer, &offset); - - switch (shader) { - case PIPE_SHADER_VERTEX: - rctx->vs_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028180_ALU_CONST_BUFFER_SIZE_VS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->vs_const_buffer, - R_028980_ALU_CONST_CACHE_VS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); - break; - case PIPE_SHADER_FRAGMENT: - rctx->ps_const_buffer.nregs = 0; - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028140_ALU_CONST_BUFFER_SIZE_PS_0, - ALIGN_DIVUP(buffer->width0 >> 4, 16), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&rctx->ps_const_buffer, - R_028940_ALU_CONST_CACHE_PS_0, - (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo); - r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); - break; - default: - R600_ERR("unsupported %d\n", shader); - return; - } -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -1199,6 +971,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view; rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; + rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; } void r600_init_config(struct r600_pipe_context *rctx) @@ -1489,3 +1262,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) S_028D0C_COPY_CENTROID(1), NULL); return rstate; } + +void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, + offset, 0xFFFFFFFF, rbuffer->bo); + r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, + rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, + S_038008_STRIDE(stride), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, + 0xC0000000, 0xFFFFFFFF, NULL); +} diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3603376f738..72707fbd8b8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -27,7 +27,9 @@ #include <util/u_memory.h> #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> +#include "pipe/p_shader_tokens.h" #include "r600_pipe.h" +#include "r600d.h" /* common state between evergreen and r600 */ void r600_bind_blend_state(struct pipe_context *ctx, void *state) @@ -119,24 +121,13 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; - /* delete previous translated vertex elements */ - if (rctx->tran.new_velems) { - r600_end_vertex_translate(rctx); - } - rctx->vertex_elements = v; if (v) { + u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state, + v->vmgr_elements); + rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(&rctx->ctx, &v->rstate); - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } - } - - if (v) { -// rctx->vs_rebuild = TRUE; } } @@ -152,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state) rctx->vertex_elements = NULL; r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL); + u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements); FREE(state); } @@ -176,88 +168,44 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_vertex_buffer *vbo; - unsigned max_index = (unsigned)-1; + int i; - if (rctx->family >= CHIP_CEDAR) { - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + /* Zero states. */ + for (i = 0; i < count; i++) { + if (!buffers[i].buffer) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { + r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } } - } else { - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); + } else { r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i); } } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - for (int i = 0; i < count; i++) { - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - rctx->vertex_buffer[i].buffer = NULL; - if (buffers[i].buffer == NULL) - continue; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - /* The stride of zero means we will be fetching only the first - * vertex, so don't care about max_index. */ - if (!vbo->stride) - continue; - - if (vbo->max_index == ~0) { - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - } - max_index = MIN2(vbo->max_index, max_index); - } - rctx->nvertex_buffer = count; - rctx->vb_max_index = max_index; - if (rctx->family >= CHIP_CEDAR) { - evergreen_vertex_buffer_update(rctx); - } else { - r600_vertex_buffer_update(rctx); - } + u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers); } - -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - enum pipe_format *format; - int i; assert(count < 32); if (!v) return NULL; v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - - for (i = 0; i < count; i++) { - v->hw_format[i] = v->elements[i].src_format; - format = &v->hw_format[i]; - - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - default:; - } - v->incompatible_layout = - v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i]; - - v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4); - } + v->vmgr_elements = + u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count, + elements, v->elements); if (r600_vertex_elements_build_fetch_shader(rctx, v)) { FREE(v); @@ -327,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) r600_pipe_shader_destroy(ctx, shader); free(shader); } + +/* FIXME optimize away spi update when it's not needed */ +void r600_spi_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_shader *shader = rctx->ps_shader; + struct r600_pipe_state rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, tmp; + + rstate.nregs = 0; + for (i = 0; i < rshader->ninput; i++) { + tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + + if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { + tmp |= S_028644_FLAT_SHADE(rctx->flatshade); + } + + if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && + rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { + tmp |= S_028644_PT_SPRITE_TEX(1); + } + + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); + + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } + + r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + } + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + +void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, + struct pipe_resource *buffer) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource_buffer *rbuffer = r600_buffer(buffer); + struct r600_pipe_state *rstate; + uint32_t offset; + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (buffer == NULL) { + return; + } + + r600_upload_const_buffer(rctx, &rbuffer, &offset); + offset += r600_bo_offset(rbuffer->r.bo); + + switch (shader) { + case PIPE_SHADER_VERTEX: + rctx->vs_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028180_ALU_CONST_BUFFER_SIZE_VS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vs_const_buffer, + R_028980_ALU_CONST_CACHE_VS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); + + rstate = &rctx->vs_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); + } + break; + case PIPE_SHADER_FRAGMENT: + rctx->ps_const_buffer.nregs = 0; + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028140_ALU_CONST_BUFFER_SIZE_PS_0, + ALIGN_DIVUP(buffer->width0 >> 4, 16), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->ps_const_buffer, + R_028940_ALU_CONST_CACHE_PS_0, + offset >> 8, 0xFFFFFFFF, rbuffer->r.bo); + r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); + + rstate = &rctx->ps_const_buffer_resource[index]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); + } + break; + default: + R600_ERR("unsupported %d\n", shader); + return; + } + + if (buffer != &rbuffer->r.b.b.b) + pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); +} + +static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate; + struct r600_resource *rbuffer; + struct pipe_vertex_buffer *vertex_buffer; + unsigned i, count, offset; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + count = rctx->vertex_elements->count; + } else { + /* bind vertex buffer once */ + count = rctx->vbuf_mgr->nr_real_vertex_buffers; + } + + for (i = 0 ; i < count; i++) { + rstate = &rctx->fs_resource[i]; + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; + + if (rctx->vertex_elements->vbuffer_need_offset) { + /* one resource per vertex elements */ + unsigned vbuffer_index; + vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index; + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index]; + offset = rctx->vertex_elements->vbuffer_offset[i]; + } else { + /* bind vertex buffer once */ + vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i]; + rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i]; + offset = 0; + } + if (vertex_buffer == NULL || rbuffer == NULL) + continue; + offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } else { + r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); + } + } +} + +void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_resource *rbuffer; + u32 vgt_dma_index_type, vgt_draw_initiator, mask; + struct r600_draw rdraw; + struct r600_pipe_state vgt; + struct r600_drawl draw = {}; + unsigned prim; + + r600_flush_depth_textures(rctx); + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + r600_vertex_buffer_update(rctx); + + draw.info = *info; + draw.ctx = ctx; + if (info->indexed && rctx->index_buffer.buffer) { + draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; + pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); + + r600_translate_index_buffer(rctx, &draw.index_buffer, + &rctx->index_buffer.index_size, + &draw.info.start, + info->count); + + draw.index_size = rctx->index_buffer.index_size; + draw.index_buffer_offset = draw.info.start * draw.index_size; + draw.info.start = 0; + + if (u_vbuf_resource(draw.index_buffer)->user_ptr) { + r600_upload_index_buffer(rctx, &draw); + } + } else { + draw.info.index_bias = info->start; + } + + switch (draw.index_size) { + case 2: + vgt_draw_initiator = 0; + vgt_dma_index_type = 0; + break; + case 4: + vgt_draw_initiator = 0; + vgt_dma_index_type = 1; + break; + case 0: + vgt_draw_initiator = 2; + vgt_dma_index_type = 0; + break; + default: + R600_ERR("unsupported index size %d\n", draw.index_size); + return; + } + if (r600_conv_pipe_prim(draw.info.mode, &prim)) + return; + if (unlikely(rctx->ps_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + if (unlikely(rctx->vs_shader == NULL)) { + R600_ERR("missing vertex shader\n"); + return; + } + /* there should be enough input */ + if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { + R600_ERR("%d resources provided, expecting %d\n", + rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); + return; + } + + r600_spi_update(rctx); + + mask = 0; + for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { + mask |= (0xF << (i * 4)); + } + + vgt.id = R600_PIPE_STATE_VGT; + vgt.nregs = 0; + r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL); + r600_context_pipe_state_set(&rctx->ctx, &vgt); + + rdraw.vgt_num_indices = draw.info.count; + rdraw.vgt_num_instances = 1; + rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.indices = NULL; + if (draw.index_buffer) { + rbuffer = (struct r600_resource*)draw.index_buffer; + rdraw.indices = rbuffer->bo; + rdraw.indices_bo_offset = draw.index_buffer_offset; + } + + if (rctx->family >= CHIP_CEDAR) { + evergreen_context_draw(&rctx->ctx, &rdraw); + } else { + r600_context_draw(&rctx->ctx, &rdraw); + } + + if (rctx->framebuffer.zsbuf) + { + struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture; + ((struct r600_resource_texture *)tex)->dirty_db = TRUE; + } + + pipe_resource_reference(&draw.index_buffer, NULL); + + u_vbuf_mgr_draw_end(rctx->vbuf_mgr); +} diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index a0ec493fc85..3dd54f45202 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim) default: case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; + case PIPE_TEXTURE_1D_ARRAY: + return V_038000_SQ_TEX_DIM_1D_ARRAY; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; + case PIPE_TEXTURE_2D_ARRAY: + return V_038000_SQ_TEX_DIM_2D_ARRAY; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; case PIPE_TEXTURE_CUBE: @@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_SWAP_STD; + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_SWAP_ALT; + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_STD; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_SWAP_STD; @@ -328,6 +337,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_0280A0_SWAP_STD; @@ -346,9 +356,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10X2_SNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_SWAP_STD_REV; + return V_0280A0_SWAP_STD; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: return V_0280A0_SWAP_STD; @@ -356,14 +368,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ - //case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return FMT_32_32_32_32_FLOAT; - return 0; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_SWAP_STD; default: R600_ERR("unsupported colorswap format %d\n", format); return ~0; @@ -374,10 +385,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_L4A4_UNORM: + return V_0280A0_COLOR_4_4; + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_L8_SRGB: case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; @@ -398,6 +413,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_16; case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_L8A8_SRGB: case PIPE_FORMAT_R8G8_UNORM: return V_0280A0_COLOR_8_8; @@ -425,7 +441,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R10G10B10X2_SNORM: case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10SG10SB10SA2U_NORM: - return V_0280A0_COLOR_10_10_10_2; + return V_0280A0_COLOR_2_10_10_10; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: @@ -467,10 +483,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) return V_0280A0_COLOR_32_32; /* 128-bit buffers. */ - //case PIPE_FORMAT_R32G32B32_FLOAT: - // return V_0280A0_COLOR_32_32_32_FLOAT; - //case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_0280A0_COLOR_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return V_0280A0_COLOR_32_32_32_32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + return V_0280A0_COLOR_32_32_32_32; /* YUV buffers. */ case PIPE_FORMAT_UYVY: @@ -497,9 +516,37 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) return r600_translate_dbformat(format) != ~0; } -static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format) +static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format, + enum radeon_family family) { - return r600_translate_colorformat(format) != ~0; + unsigned i; + const struct util_format_description *desc = util_format_description(format); + if (!desc) + return FALSE; + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (i == 4) + return FALSE; + + /* No fixed, no double. */ + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED || + (desc->channel[i].size == 64 && + desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)) + return FALSE; + + /* No scaled/norm formats with 32 bits per channel. */ + if (desc->channel[i].size == 32 && + (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)) + return FALSE; + + return TRUE; } #endif diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1f4f453c091..03af367401d 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -27,6 +27,7 @@ #include <errno.h> #include <pipe/p_screen.h> #include <util/u_format.h> +#include <util/u_format_s3tc.h> #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> @@ -38,8 +39,6 @@ #include "r600d.h" #include "r600_formats.h" -extern struct u_resource_vtbl r600_texture_vtbl; - /* Copy from a full GPU texture to a transfer's staging one. */ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) { @@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, { unsigned offset = rtex->offset[level]; - switch (rtex->resource.base.b.target) { + switch (rtex->resource.b.b.b.target) { case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - return offset + layer * rtex->layer_size[level]; default: - assert(layer == 0); - return offset; + return offset + layer * rtex->layer_size[level]; } } -static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, +static unsigned r600_get_block_alignment(struct pipe_screen *screen, enum pipe_format format, unsigned array_mode) { @@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * rscreen->tiling_info->num_banks)) * 8; break; + case V_038000_ARRAY_LINEAR_ALIGNED: + p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize); + break; case V_038000_ARRAY_LINEAR_GENERAL: default: p_align = rscreen->tiling_info->group_bytes / pixsize; @@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen, h_align = rscreen->tiling_info->num_channels * 8; break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: h_align = 8; break; + case V_038000_ARRAY_LINEAR_GENERAL: default: h_align = 1; break; @@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, { struct r600_screen* rscreen = (struct r600_screen *)screen; unsigned pixsize = util_format_get_blocksize(format); - int p_align = r600_get_pixel_alignment(screen, format, array_mode); + int p_align = r600_get_block_alignment(screen, format, array_mode); int h_align = r600_get_height_alignment(screen, array_mode); int b_align; @@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen, p_align * pixsize * h_align); break; case V_038000_ARRAY_1D_TILED_THIN1: + case V_038000_ARRAY_LINEAR_ALIGNED: + case V_038000_ARRAY_LINEAR_GENERAL: default: b_align = rscreen->tiling_info->group_bytes; break; @@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level) return val; } -static unsigned r600_texture_get_stride(struct pipe_screen *screen, - struct r600_resource_texture *rtex, - unsigned level) +static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned width, stride, tile_width; + struct pipe_resource *ptex = &rtex->resource.b.b.b; + unsigned nblocksx, block_align, width; + unsigned blocksize = util_format_get_blocksize(ptex->format); if (rtex->pitch_override) - return rtex->pitch_override; + return rtex->pitch_override / blocksize; width = mip_minify(ptex->width0, level); - if (util_format_is_plain(ptex->format)) { - tile_width = r600_get_pixel_alignment(screen, ptex->format, - rtex->array_mode[level]); - width = align(width, tile_width); - } - stride = util_format_get_stride(ptex->format, width); + nblocksx = util_format_get_nblocksx(ptex->format, width); - return stride; + block_align = r600_get_block_alignment(screen, ptex->format, + rtex->array_mode[level]); + nblocksx = align(nblocksx, block_align); + return nblocksx; } static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; unsigned height, tile_height; height = mip_minify(ptex->height0, level); - if (util_format_is_plain(ptex->format)) { - tile_height = r600_get_height_alignment(screen, - rtex->array_mode[level]); - height = align(height, tile_height); - } - return util_format_get_nblocksy(ptex->format, height); -} - -/* Get a width in pixels from a stride in bytes. */ -static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes) -{ - return (pitch_in_bytes / util_format_get_blocksize(format)) * - util_format_get_blockwidth(format); + height = util_format_get_nblocksy(ptex->format, height); + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + return height; } static void r600_texture_set_array_mode(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: @@ -227,7 +222,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, unsigned w, h, tile_height, tile_width; tile_height = r600_get_height_alignment(screen, array_mode); - tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + tile_width = r600_get_block_alignment(screen, ptex->format, array_mode); w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); @@ -244,40 +239,128 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned array_mode) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned pitch, size, layer_size, i, offset; - unsigned nblocksy; + unsigned size, layer_size, i, offset; + unsigned nblocksx, nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { + unsigned blocksize = util_format_get_blocksize(ptex->format); + r600_texture_set_array_mode(screen, rtex, i, array_mode); - pitch = r600_texture_get_stride(screen, rtex, i); + nblocksx = r600_texture_get_nblocksx(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); - layer_size = pitch * nblocksy; - + layer_size = nblocksx * nblocksy * blocksize; if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; else size = layer_size * 6; } - else + else if (ptex->target == PIPE_TEXTURE_3D) size = layer_size * u_minify(ptex->depth0, i); + else + size = layer_size * ptex->array_size; + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch_in_bytes[i] = pitch; - rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); + rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ + rtex->pitch_in_bytes[i] = nblocksx * blocksize; + offset += size; } rtex->size = offset; } +/* Figure out whether u_blitter will fallback to a transfer operation. + * If so, don't use a staging resource. + */ +static boolean permit_hardware_blit(struct pipe_screen *screen, + const struct pipe_resource *res) +{ + unsigned bind; + + if (util_format_is_depth_or_stencil(res->format)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; + + /* hackaround for S3TC */ + if (util_format_is_s3tc(res->format)) + return TRUE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + bind, 0)) + return FALSE; + + if (!screen->is_format_supported(screen, + res->format, + res->target, + res->nr_samples, + PIPE_BIND_SAMPLER_VIEW, 0)) + return FALSE; + + return TRUE; +} + +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + return r600_bo_get_winsys_handle(radeon, resource->bo, + rtex->pitch_in_bytes[0], whandle); +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + struct radeon *radeon = (struct radeon *)screen->winsys; + + if (rtex->flushed_depth_texture) + pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (resource->bo) { + r600_bo_reference(radeon, &resource->bo, NULL); + } + FREE(rtex); +} + +static unsigned int r600_texture_is_referenced(struct pipe_context *context, + struct pipe_resource *texture, + unsigned level, int layer) +{ + /* FIXME */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static const struct u_resource_vtbl r600_texture_vtbl = +{ + r600_texture_get_handle, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_is_referenced, /* is_resource_referenced */ + r600_texture_get_transfer, /* get_transfer */ + r600_texture_transfer_destroy, /* transfer_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region,/* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + static struct r600_resource_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -295,21 +378,22 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; resource = &rtex->resource; - resource->base.b = *base; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; + resource->b.b.b = *base; + resource->b.b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->b.b.b.reference, 1); + resource->b.b.b.screen = screen; resource->bo = bo; rtex->pitch_override = pitch_in_bytes_override; + /* only mark depth textures the HW can hit as depth textures */ + if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base)) + rtex->depth = 1; - if (array_mode) - rtex->tiled = 1; r600_setup_miptree(screen, rtex, array_mode); resource->size = rtex->size; if (!resource->bo) { - struct pipe_resource *ptex = &rtex->resource.base.b; + struct pipe_resource *ptex = &rtex->resource.b.b.b; int base_align = r600_get_base_alignment(screen, ptex->format, array_mode); resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage); @@ -321,48 +405,6 @@ r600_texture_create_object(struct pipe_screen *screen, return rtex; } -/* Figure out whether u_blitter will fallback to a transfer operation. - * If so, don't use a staging resource. - */ -static boolean permit_hardware_blit(struct pipe_screen *screen, - const struct pipe_resource *res) -{ - unsigned bind; - - if (util_format_is_depth_or_stencil(res->format)) - bind = PIPE_BIND_DEPTH_STENCIL; - else - bind = PIPE_BIND_RENDER_TARGET; - - /* See r600_resource_copy_region: there is something wrong - * with depth resource copies at the moment so avoid them for - * now. - */ - if (util_format_get_component_bits(res->format, - UTIL_FORMAT_COLORSPACE_ZS, - 0) != 0) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - bind, 0)) - return FALSE; - - if (!screen->is_format_supported(screen, - res->format, - res->target, - res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) - return FALSE; - - if (res->usage == PIPE_USAGE_STREAM) - return FALSE; - - return TRUE; -} - struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { @@ -381,46 +423,21 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } } + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + util_format_is_s3tc(templ->format)) + array_mode = V_038000_ARRAY_1D_TILED_THIN1; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, 0, 0, NULL); } -static void r600_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *ptex) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - if (rtex->flushed_depth_texture) - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); - - if (resource->bo) { - r600_bo_reference(radeon, &resource->bo, NULL); - } - FREE(rtex); -} - -static boolean r600_texture_get_handle(struct pipe_screen* screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) -{ - struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex; - struct r600_resource *resource = &rtex->resource; - struct radeon *radeon = (struct radeon *)screen->winsys; - - return r600_bo_get_winsys_handle(radeon, resource->bo, - rtex->pitch_in_bytes[0], whandle); -} - static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *surf_tmpl) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_surface *surface = CALLOC_STRUCT(r600_surface); - unsigned tile_height; unsigned level = surf_tmpl->u.tex.level; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -440,8 +457,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->base.u.tex.level = level; - tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]); - surface->aligned_height = align(surface->base.height, tile_height); + surface->aligned_height = r600_texture_get_nblocksy(pipe->screen, + rtex, level); return &surface->base; } @@ -477,16 +494,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, bo); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - int r600_texture_depth_flush(struct pipe_context *ctx, - struct pipe_resource *texture) + struct pipe_resource *texture, boolean just_create) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; @@ -499,7 +508,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = 1; - resource.last_level = 0; + resource.array_size = 1; + resource.last_level = texture->last_level; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; @@ -513,7 +523,11 @@ int r600_texture_depth_flush(struct pipe_context *ctx, return -ENOMEM; } + ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: + if (just_create) + return 0; + /* XXX: only do this if the depth texture has actually changed: */ r600_blit_uncompress_depth(ctx, rtex); @@ -546,7 +560,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ - if (rtex->tiled) + if (R600_TEX_IS_TILED(rtex, level)) use_staging_texture = TRUE; if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) @@ -579,13 +593,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, */ /* XXX: when discard is true, no need to read back from depth texture */ - r = r600_texture_depth_flush(ctx, texture); + r = r600_texture_depth_flush(ctx, texture, FALSE); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } + trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level]; + trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z); + return &trans->transfer; } else if (use_staging_texture) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; @@ -627,6 +644,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, return &trans->transfer; } trans->transfer.stride = rtex->pitch_in_bytes[level]; + trans->transfer.layer_stride = rtex->layer_size[level]; trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; } @@ -635,7 +653,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; + struct pipe_resource *texture = transfer->resource; + struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; if (rtransfer->staging_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -643,9 +662,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, } pipe_resource_reference(&rtransfer->staging_texture, NULL); } - if (rtex->flushed_depth_texture) { - pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); + + if (rtex->depth && !rtex->is_flushing_texture) { + if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture) + r600_blit_push_depth(ctx, rtex); } + pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); } @@ -727,19 +749,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx, r600_bo_unmap(radeon, bo); } -struct u_resource_vtbl r600_texture_vtbl = -{ - r600_texture_get_handle, /* get_handle */ - r600_texture_destroy, /* resource_destroy */ - r600_texture_is_referenced, /* is_resource_referenced */ - r600_texture_get_transfer, /* get_transfer */ - r600_texture_transfer_destroy, /* transfer_destroy */ - r600_texture_transfer_map, /* transfer_map */ - u_default_transfer_flush_region,/* transfer_flush_region */ - r600_texture_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ -}; - void r600_init_surface_functions(struct r600_pipe_context *r600) { r600->context.create_surface = r600_create_surface; @@ -802,6 +811,8 @@ uint32_t r600_translate_texformat(enum pipe_format format, uint32_t result = 0, word4 = 0, yuv_format = 0; const struct util_format_description *desc; boolean uniform = TRUE; + static int r600_enable_s3tc = -1; + int i; const uint32_t sign_bit[4] = { S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED), @@ -853,34 +864,55 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_SRGB: word4 |= S_038010_FORCE_DEGAMMA(1); - if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB) - goto out_unknown; /* fails for some reason - TODO */ break; default: break; } - /* S3TC formats. TODO */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - static int r600_enable_s3tc = -1; + if (r600_enable_s3tc == -1) + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + + if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + if (!r600_enable_s3tc) + goto out_unknown; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + result = FMT_BC4; + goto out_word4; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + result = FMT_BC5; + goto out_word4; + default: + goto out_unknown; + } + } - if (r600_enable_s3tc == -1) - r600_enable_s3tc = - debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { if (!r600_enable_s3tc) goto out_unknown; + if (!util_format_s3tc_enabled) { + goto out_unknown; + } + switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: result = FMT_BC1; goto out_word4; case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: result = FMT_BC2; goto out_word4; case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: result = FMT_BC3; goto out_word4; default: @@ -897,8 +929,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, /* R8G8Bx_SNORM - TODO CxV8U8 */ - /* RGTC - TODO */ - /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; @@ -927,7 +957,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = FMT_10_10_10_2; + result = FMT_2_10_10_10; goto out_word4; } goto out_unknown; @@ -990,6 +1020,19 @@ uint32_t r600_translate_texformat(enum pipe_format format, result = FMT_16_16_16_16; goto out_word4; } + goto out_unknown; + case 32: + switch (desc->nr_channels) { + case 1: + result = FMT_32; + goto out_word4; + case 2: + result = FMT_32_32; + goto out_word4; + case 4: + result = FMT_32_32_32_32; + goto out_word4; + } } goto out_unknown; diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index f80fa7af941..7482d15e12f 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -22,178 +22,34 @@ * * Authors: Dave Airlie <[email protected]> */ -#include "translate/translate_cache.h" -#include "translate/translate.h" -#include <pipebuffer/pb_buffer.h> + #include <util/u_index_modify.h> +#include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "r600_pipe.h" -void r600_begin_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r600_vertex_element *ve = rctx->vertex_elements; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - void *tmp; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i]) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->elements[i].vertex_buffer_index; - te->input_format = ve->elements[i].src_format; - te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(rctx->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = rctx->vb_max_index + 1; - - out_buffer = pipe_buffer_create(&rctx->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, vb_transfer[i]); - } - } - - pipe_buffer_unmap(pipe, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - rctx->tran.vb_slot = i; - break; - } - } - - /* Save and replace vertex elements. */ - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } - } - - tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, tmp); - rctx->tran.new_velems = tmp; - - pipe_resource_reference(&out_buffer, NULL); -} - -void r600_end_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - - if (rctx->tran.new_velems == NULL) { - return; - } - /* Restore vertex elements. */ - pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); - rctx->tran.new_velems = NULL; - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL); -} void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) { + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + boolean flushed; + switch (*index_size) { case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2, + &out_offset, &out_buffer, &flushed, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r600->context, *index_buffer, 0, *start, count, ptr); + + pipe_resource_reference(index_buffer, out_buffer); *index_size = 2; - *start = 0; - break; - case 2: - case 4: + *start = out_offset / 2; break; } } diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c deleted file mode 100644 index 44102ff55b6..00000000000 --- a/src/gallium/drivers/r600/r600_upload.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse <[email protected]> - */ -#include <errno.h> -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "r600.h" -#include "r600_pipe.h" -#include "r600_resource.h" - -struct r600_upload { - struct r600_pipe_context *rctx; - struct r600_bo *buffer; - char *ptr; - unsigned size; - unsigned default_size; - unsigned total_alloc_size; - unsigned offset; - unsigned alignment; -}; - -struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx, - unsigned default_size, - unsigned alignment) -{ - struct r600_upload *upload = CALLOC_STRUCT(r600_upload); - - if (upload == NULL) - return NULL; - - upload->rctx = rctx; - upload->size = 0; - upload->default_size = default_size; - upload->alignment = alignment; - upload->ptr = NULL; - upload->buffer = NULL; - upload->total_alloc_size = 0; - - return upload; -} - -void r600_upload_flush(struct r600_upload *upload) -{ - if (upload->buffer) { - r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); - } - upload->default_size = MAX2(upload->total_alloc_size, upload->default_size); - upload->total_alloc_size = 0; - upload->size = 0; - upload->offset = 0; - upload->ptr = NULL; - upload->buffer = NULL; -} - -void r600_upload_destroy(struct r600_upload *upload) -{ - r600_upload_flush(upload); - FREE(upload); -} - -int r600_upload_buffer(struct r600_upload *upload, unsigned offset, - unsigned size, struct r600_resource_buffer *in_buffer, - unsigned *out_offset, unsigned *out_size, - struct r600_bo **out_buffer) -{ - unsigned alloc_size = align(size, upload->alignment); - const void *in_ptr = NULL; - - if (upload->offset + alloc_size > upload->size) { - if (upload->size) { - r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL); - } - upload->size = align(MAX2(upload->default_size, alloc_size), 4096); - upload->total_alloc_size += upload->size; - upload->offset = 0; - upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0); - if (upload->buffer == NULL) { - return -ENOMEM; - } - upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL); - } - - in_ptr = in_buffer->user_buffer; - memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size); - *out_offset = upload->offset; - *out_size = upload->size; - *out_buffer = NULL; - r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer); - upload->offset += alloc_size; - - return 0; -} diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8c391936db0..e8558c49a7c 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -248,6 +248,8 @@ #define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27) #define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1) #define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF +#define V_0280A0_EXPORT_FULL 0 +#define V_0280A0_EXPORT_NORM 1 #define R_028060_CB_COLOR0_SIZE 0x028060 #define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0) #define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF) @@ -2332,31 +2334,6 @@ #define R_0280D4_CB_COLOR5_TILE 0x0280D4 #define R_0280D8_CB_COLOR6_TILE 0x0280D8 #define R_0280DC_CB_COLOR7_TILE 0x0280DC -#define R_028808_CB_COLOR_CONTROL 0x028808 -#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) -#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) -#define C_028808_FOG_ENABLE 0xFFFFFFFE -#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1) -#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1) -#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD -#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2) -#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1) -#define C_028808_DITHER_ENABLE 0xFFFFFFFB -#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3) -#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1) -#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7 -#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4) -#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7) -#define C_028808_SPECIAL_OP 0xFFFFFF8F -#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7) -#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1) -#define C_028808_PER_MRT_BLEND 0xFFFFFF7F -#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8) -#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF) -#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF -#define S_028808_ROP3(x) (((x) & 0xFF) << 16) -#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) -#define C_028808_ROP3 0xFF00FFFF #define R_028614_SPI_VS_OUT_ID_0 0x028614 #define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0) #define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 94e57e40f86..3aefb5b3bb5 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -987,6 +987,19 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, } +static void rbug_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct rbug_context *rb_pipe = rbug_context(_context); + struct rbug_resource *rb_resource = rbug_resource(_resource); + struct pipe_context *context = rb_pipe->pipe; + struct pipe_resource *resource = rb_resource->resource; + + context->redefine_user_buffer(context, resource, offset, size); +} + + struct pipe_context * rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -1072,6 +1085,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap; rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region; rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write; + rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer; rb_pipe->pipe = pipe; diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index 7d7cc482ae6..15f5db40093 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -98,8 +98,9 @@ rbug_surface_create(struct rbug_context *rb_context, pipe_reference_init(&rb_surface->base.reference, 1); rb_surface->base.texture = NULL; + rb_surface->base.context = &rb_context->base; + rb_surface->surface = surface; /* we own the surface already */ pipe_resource_reference(&rb_surface->base.texture, &rb_resource->base); - rb_surface->surface = surface; return &rb_surface->base; @@ -113,8 +114,7 @@ rbug_surface_destroy(struct rbug_context *rb_context, struct rbug_surface *rb_surface) { pipe_resource_reference(&rb_surface->base.texture, NULL); - rb_context->pipe->surface_destroy(rb_context->pipe, - rb_surface->surface); + pipe_surface_reference(&rb_surface->surface, NULL); FREE(rb_surface); } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fe54f92addf..70fdfb7ddf3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -230,7 +230,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE ); softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = NULL; @@ -315,7 +315,7 @@ softpipe_create_context( struct pipe_screen *screen, (struct tgsi_sampler **) softpipe->tgsi.geom_samplers_list); - if (debug_get_bool_option( "SP_NO_RAST", FALSE )) + if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE )) softpipe->no_rast = TRUE; softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 035d712d17c..c91709aef06 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,7 +58,7 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; - struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *fragment_samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; @@ -91,8 +91,8 @@ struct softpipe_context { } so_target; struct pipe_query_data_so_statistics so_stats; - unsigned num_samplers; - unsigned num_sampler_views; + unsigned num_fragment_samplers; + unsigned num_fragment_sampler_views; unsigned num_vertex_samplers; unsigned num_vertex_sampler_views; unsigned num_geometry_samplers; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index d422cb17a4b..6f7addd441a 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -51,7 +51,7 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - for (i = 0; i < softpipe->num_sampler_views; i++) { + for (i = 0; i < softpipe->num_fragment_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]); } for (i = 0; i < softpipe->num_vertex_sampler_views; i++) { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index c433405cb66..a06817c5735 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -126,6 +126,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_INSTANCED_DRAWING: return 1; + case PIPE_CAP_ARRAY_TEXTURES: + return 1; default: return 0; } @@ -186,7 +188,9 @@ softpipe_is_format_supported( struct pipe_screen *screen, assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5d727dc00df..0ce28f4c6ee 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -575,7 +575,7 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dady[0] = 0.0; /*Y*/ setup->coef[slot].a0[1] = - (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0) + (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0) + (spfs->pixel_center_integer ? 0.0 : 0.5); setup->coef[slot].dadx[1] = 0.0; setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 38943563800..60331bc4976 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -76,18 +76,18 @@ softpipe_bind_fragment_sampler_states(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_samplers && - !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + if (num == softpipe->num_fragment_samplers && + !memcmp(softpipe->fragment_samplers, sampler, num * sizeof(void *))) return; draw_flush(softpipe->draw); for (i = 0; i < num; ++i) - softpipe->sampler[i] = sampler[i]; + softpipe->fragment_samplers[i] = sampler[i]; for (i = num; i < PIPE_MAX_SAMPLERS; ++i) - softpipe->sampler[i] = NULL; + softpipe->fragment_samplers[i] = NULL; - softpipe->num_samplers = num; + softpipe->num_fragment_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } @@ -191,7 +191,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe, assert(num <= PIPE_MAX_SAMPLERS); /* Check for no-op */ - if (num == softpipe->num_sampler_views && + if (num == softpipe->num_fragment_sampler_views && !memcmp(softpipe->fragment_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) return; @@ -205,7 +205,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe, sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[i], view); } - softpipe->num_sampler_views = num; + softpipe->num_fragment_sampler_views = num; softpipe->dirty |= SP_NEW_TEXTURE; } @@ -374,10 +374,10 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe) } for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { - if (softpipe->sampler[i]) { + if (softpipe->fragment_samplers[i]) { softpipe->tgsi.frag_samplers_list[i] = get_sampler_variant( i, - sp_sampler(softpipe->sampler[i]), + sp_sampler(softpipe->fragment_samplers[i]), softpipe->fragment_sampler_views[i], TGSI_PROCESSOR_FRAGMENT ); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 5f4d661abde..aa0b333c7a9 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_transfer.h" #include "draw/draw_context.h" @@ -119,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe) pipe->set_vertex_buffers = softpipe_set_vertex_buffers; pipe->set_index_buffer = softpipe_set_index_buffer; + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 242c27c7ebd..8a4ef934348 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -539,6 +539,19 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size, } +/** + * Do coordinate to array index conversion. For array textures. + */ +static INLINE void +wrap_array_layer(const float coord[4], unsigned size, int layer[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + int c = util_ifloor(coord[ch] + 0.5F); + layer[ch] = CLAMP(c, 0, size - 1); + } +} + /** * Examine the quad's texture coordinates to compute the partial @@ -761,6 +774,43 @@ get_texel_3d(const struct sp_sampler_variant *samp, } +/* Get texel pointer for 1D array texture */ +static INLINE const float * +get_texel_1d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) u_minify(texture->width0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_2d_no_border(samp, addr, x, y); + } +} + + +/* Get texel pointer for 2D array texture */ +static INLINE const float * +get_texel_2d_array(const struct sp_sampler_variant *samp, + union tex_tile_address addr, int x, int y, int layer) +{ + const struct pipe_resource *texture = samp->view->texture; + unsigned level = addr.bits.level; + + assert(layer < texture->array_size); + + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { + return samp->sampler->border_color; + } + else { + return get_texel_3d_no_border(samp, addr, x, y, layer); + } +} + + /** * Given the logbase2 of a mipmap's base level size and a mipmap level, * return the size (in texels) of that mipmap level. @@ -990,6 +1040,47 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, static void +img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + wrap_array_layer(t, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_1d_array(samp, addr, x[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + +static void img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1033,6 +1124,50 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x[4], y[4], layer[4]; + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + wrap_array_layer(p, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d_array(samp, addr, x[j], y[j], layer[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + static INLINE union tex_tile_address face(union tex_tile_address addr, unsigned face ) { @@ -1168,6 +1303,47 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, static void +img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width; + int x0[4], x1[4], layer[4]; + float xw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + wrap_array_layer(t, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_1d_array(samp, addr, x0[j], layer[j]); + const float *tx1 = get_texel_1d_array(samp, addr, x1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]); + } + } +} + + +static void img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1215,6 +1391,54 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, static void +img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4], layer[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + level0 = samp->level; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + + assert(width > 0); + assert(height > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + wrap_array_layer(p, texture->array_size, layer); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d_array(samp, addr, x0[j], y0[j], layer[j]); + const float *tx1 = get_texel_2d_array(samp, addr, x1[j], y0[j], layer[j]); + const float *tx2 = get_texel_2d_array(samp, addr, x0[j], y1[j], layer[j]); + const float *tx3 = get_texel_2d_array(samp, addr, x1[j], y1[j], layer[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); + } + } +} + + +static void img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], @@ -1906,8 +2130,10 @@ get_lambda_func(const union sp_sampler_key key) switch (key.bits.target) { case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: return compute_lambda_1d; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return compute_lambda_2d; @@ -1932,6 +2158,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_1d_linear; break; + case PIPE_TEXTURE_1D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_1d_array_nearest; + else + return img_filter_1d_array_linear; + break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: /* Try for fast path: @@ -1967,6 +2199,12 @@ get_img_filter(const union sp_sampler_key key, else return img_filter_2d_linear; break; + case PIPE_TEXTURE_2D_ARRAY: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_2d_array_nearest; + else + return img_filter_2d_array_linear; + break; case PIPE_TEXTURE_CUBE: if (filter == PIPE_TEX_FILTER_NEAREST) return img_filter_cube_nearest; diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index e42015ad498..e589ee7c841 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -251,6 +251,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_level != addr.bits.level || tc->tex_z != addr.bits.z) { /* get new transfer (view into texture) */ + unsigned width, height, layer; if (tc->tex_trans) { if (tc->tex_trans_map) { @@ -262,14 +263,22 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_trans = NULL; } + width = u_minify(tc->texture->width0, addr.bits.level); + if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) { + height = tc->texture->array_size; + layer = 0; + } + else { + height = u_minify(tc->texture->height0, addr.bits.level); + layer = addr.bits.face + addr.bits.z; + } + tc->tex_trans = pipe_get_transfer(tc->pipe, tc->texture, addr.bits.level, - addr.bits.face + addr.bits.z, + layer, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED, - 0, 0, - u_minify(tc->texture->width0, addr.bits.level), - u_minify(tc->texture->height0, addr.bits.level)); + 0, 0, width, height); tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); @@ -278,22 +287,17 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, tc->tex_z = addr.bits.z; } - /* get tile from the transfer (view into texture) - * Note we're using the swizzle version of this fuction only because - * we need to pass the texture cache's format explicitly. + /* Get tile from the transfer (view into texture), explicitly passing + * the image format. */ - pipe_get_tile_swizzle(tc->pipe, - tc->tex_trans, - addr.bits.x * TILE_SIZE, - addr.bits.y * TILE_SIZE, - TILE_SIZE, - TILE_SIZE, - PIPE_SWIZZLE_RED, - PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_BLUE, - PIPE_SWIZZLE_ALPHA, - tc->format, - (float *) tile->data.color); + pipe_get_tile_rgba_format(tc->pipe, + tc->tex_trans, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, + TILE_SIZE, + tc->format, + (float *) tile->data.color); tile->addr = addr; } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 2220955b715..9bced37990a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -92,7 +92,7 @@ struct softpipe_tex_tile_cache unsigned swizzle_g; unsigned swizzle_b; unsigned swizzle_a; - unsigned format; + enum pipe_format format; struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ }; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 509d9982b17..95374c34ec3 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -62,13 +62,21 @@ softpipe_resource_layout(struct pipe_screen *screen, unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + spr->stride[level] = util_format_get_stride(pt->format, width); spr->level_offset[level] = buffer_size; buffer_size += (util_format_get_nblocksy(pt->format, height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - spr->stride[level]); + slices * spr->stride[level]); width = u_minify(width, 1); height = u_minify(height, 1); @@ -227,9 +235,13 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr, unsigned offset = spr->level_offset[level]; if (spr->base.target == PIPE_TEXTURE_CUBE || - spr->base.target == PIPE_TEXTURE_3D) { + spr->base.target == PIPE_TEXTURE_3D || + spr->base.target == PIPE_TEXTURE_2D_ARRAY) { offset += layer * nblocksy * spr->stride[level]; } + else if (spr->base.target == PIPE_TEXTURE_1D_ARRAY) { + offset += layer * spr->stride[level]; + } else { assert(layer == 0); } @@ -292,7 +304,7 @@ softpipe_surface_destroy(struct pipe_context *pipe, * a resource object. * \param pipe rendering context * \param resource the resource to transfer in/out of - * \param sr indicates cube face or 3D texture slice + * \param level which mipmap level * \param usage bitmask of PIPE_TRANSFER_x flags * \param box the 1D/2D/3D region of interest */ @@ -311,8 +323,21 @@ softpipe_get_transfer(struct pipe_context *pipe, /* make sure the requested region is in the image bounds */ assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); + if (resource->target == PIPE_TEXTURE_1D_ARRAY) { + assert(box->y + box->height <= resource->array_size); + } + else { + assert(box->y + box->height <= u_minify(resource->height0, level)); + if (resource->target == PIPE_TEXTURE_2D_ARRAY) { + assert(box->z + box->depth <= resource->array_size); + } + else if (resource->target == PIPE_TEXTURE_CUBE) { + assert(box->z < 6); + } + else { + assert(box->z + box->depth <= (u_minify(resource->depth0, level))); + } + } /* * Transfers, like other pipe operations, must happen in order, so flush the diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 480860af63b..60870b8bee5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -357,11 +357,12 @@ sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos) tc->entries[pos]->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, tc->transfer, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tc->entries[pos]->data.color); + pipe_put_tile_rgba_format(tc->pipe, tc->transfer, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tc->entries[pos]->data.color); } tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */ } @@ -468,11 +469,12 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(tc->pipe, pt, - tc->tile_addrs[pos].bits.x * TILE_SIZE, - tc->tile_addrs[pos].bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); + pipe_put_tile_rgba_format(tc->pipe, pt, + tc->tile_addrs[pos].bits.x * TILE_SIZE, + tc->tile_addrs[pos].bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tile->data.color); } } diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 1e513f1039f..f0f875b2b23 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -34,6 +34,7 @@ #include "svga_context.h" #include "svga_screen.h" +#include "svga_surface.h" #include "svga_resource_texture.h" #include "svga_resource_buffer.h" #include "svga_resource.h" @@ -43,6 +44,12 @@ #include "svga_debug.h" #include "svga_state.h" +DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE); +DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0); +DEBUG_GET_ONCE_BOOL_OPTION(no_line_width, "SVGA_NO_LINE_WIDTH", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", FALSE); static void svga_destroy( struct pipe_context *pipe ) { @@ -113,13 +120,12 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, /* debug */ - svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); - svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); - svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE); - svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); - - if (!svga_init_swtnl(svga)) - goto no_swtnl; + svga->debug.no_swtnl = debug_get_option_no_swtnl(); + svga->debug.force_swtnl = debug_get_option_force_swtnl(); + svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap(); + svga->debug.disable_shader = debug_get_option_disable_shader(); + svga->debug.no_line_width = debug_get_option_no_line_width(); + svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple(); svga->fs_bm = util_bitmask_create(); if (svga->fs_bm == NULL) @@ -149,6 +155,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, if (svga->hwtnl == NULL) goto no_hwtnl; + if (!svga_init_swtnl(svga)) + goto no_swtnl; ret = svga_emit_initial_state( svga ); if (ret) @@ -171,6 +179,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, return &svga->pipe; no_state: + svga_destroy_swtnl(svga); +no_swtnl: svga_hwtnl_destroy( svga->hwtnl ); no_hwtnl: u_upload_destroy( svga->upload_vb ); @@ -181,8 +191,6 @@ no_upload_ib: no_vs_bm: util_bitmask_destroy( svga->fs_bm ); no_fs_bm: - svga_destroy_swtnl(svga); -no_swtnl: svga->swc->destroy(svga->swc); no_swc: FREE(svga); @@ -196,14 +204,10 @@ void svga_context_flush( struct svga_context *svga, { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_fence_handle *fence = NULL; + enum pipe_error ret; svga->curr.nr_fbs = 0; - /* Unmap upload manager buffers: - */ - u_upload_flush(svga->upload_vb); - u_upload_flush(svga->upload_ib); - /* Ensure that texture dma uploads are processed * before submitting commands. */ @@ -220,6 +224,21 @@ void svga_context_flush( struct svga_context *svga, */ svga->dirty |= SVGA_NEW_COMMAND_BUFFER; + /* + * We must reemit the surface bindings here, because svga_update_state + * will always flush the primitives before processing the + * SVGA_NEW_COMMAND_BUFFER state change. + * + * TODO: Refactor this. + */ + ret = svga_reemit_framebuffer_bindings(svga); + assert(ret == PIPE_OK); + + ret = svga_reemit_tss_bindings(svga); + assert(ret == PIPE_OK); + + svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER; + if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0); @@ -245,6 +264,30 @@ void svga_hwtnl_flush_retry( struct svga_context *svga ) assert(ret == 0); } + +/* Emit all operations pending on host surfaces. + */ +void svga_surfaces_flush(struct svga_context *svga) +{ + unsigned i; + + /* Emit buffered drawing commands. + */ + svga_hwtnl_flush_retry( svga ); + + /* Emit back-copy from render target view to texture. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_propagate_surface(svga, svga->curr.framebuffer.cbufs[i]); + } + + if (svga->curr.framebuffer.zsbuf) + svga_propagate_surface(svga, svga->curr.framebuffer.zsbuf); + +} + + struct svga_winsys_context * svga_winsys_context( struct pipe_context *pipe ) { diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index d4970908b1e..7b36a3606e0 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -35,6 +35,8 @@ #include "tgsi/tgsi_scan.h" +#include "svga_state.h" + #define SVGA_TEX_UNITS 8 #define SVGA_MAX_POINTSIZE 80.0 @@ -147,7 +149,14 @@ struct svga_rasterizer_state { float pointsize; unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ - unsigned need_pipeline:16; /* which prims do we need help for? */ + + /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */ + unsigned need_pipeline:16; + + /** For debugging: */ + const char* need_pipeline_tris_str; + const char* need_pipeline_lines_str; + const char* need_pipeline_points_str; }; struct svga_sampler_state { @@ -237,7 +246,7 @@ struct svga_prescale { }; -/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT ) +/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR ) */ struct svga_hw_clear_state { @@ -317,6 +326,9 @@ struct svga_context unsigned shader_id; unsigned disable_shader; + + boolean no_line_width; + boolean force_hw_line_stipple; } debug; struct { @@ -332,7 +344,7 @@ struct svga_context struct util_bitmask *vs_bm; struct { - unsigned dirty[4]; + unsigned dirty[SVGA_STATE_MAX]; unsigned texture_timestamp; @@ -355,6 +367,9 @@ struct svga_context /** List of buffers with queued transfers */ struct list_head dirty_buffers; + + /** Was the previous draw done with the SW path? */ + boolean prev_draw_swtnl; }; /* A flag for each state_tracker state object: @@ -438,6 +453,8 @@ void svga_context_flush( struct svga_context *svga, void svga_hwtnl_flush_retry( struct svga_context *svga ); +void svga_surfaces_flush(struct svga_context *svga); + struct pipe_context * svga_context_create(struct pipe_screen *screen, void *priv); diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 97cbac447d6..2c873a0f7ac 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -28,6 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "svga_context.h" #include "svga_draw.h" @@ -143,6 +144,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) SVGA3dPrimitiveRange *prim; unsigned i; + /* Unmap upload manager vertex buffers */ + u_upload_flush(svga->upload_vb); + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); if (handle == NULL) @@ -151,6 +155,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) vb_handle[i] = handle; } + /* Unmap upload manager index buffers */ + u_upload_flush(svga->upload_ib); + for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index be0e7abe21b..a6518042eb9 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -53,6 +53,7 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index 83527c6ef49..7d420c6b295 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -56,6 +56,7 @@ translate_indices( struct svga_hwtnl *hwtnl, dst = pipe_buffer_create( pipe->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, size ); if (dst == NULL) goto fail; diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index 426698806c8..c87afb6946c 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -50,7 +50,9 @@ static void svga_surface_copy(struct pipe_context *pipe, struct pipe_surface *srcsurf, *dstsurf;*/ unsigned dst_face, dst_z, src_face, src_z; - svga_hwtnl_flush_retry( svga ); + /* Emit buffered drawing commands, and any back copies. + */ + svga_surfaces_flush( svga ); #if 0 srcsurf = screen->get_tex_surface(screen, src_tex, diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 001ec3616c4..d98b9b0e000 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -157,6 +157,14 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (!u_trim_pipe_prim( info->mode, &count )) return; + if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) { + /* We're switching between SW and HW drawing. Do a flush to avoid + * mixing HW and SW rendering with the same vertex buffer. + */ + pipe->flush(pipe, ~0, NULL); + svga->prev_draw_swtnl = svga->state.sw.need_swtnl; + } + /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index ab243aa6ec5..9357d827f28 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -24,6 +24,7 @@ **********************************************************/ #include "pipe/p_defines.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_surface.h" #include "svga_context.h" @@ -35,20 +36,10 @@ static void svga_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { struct svga_context *svga = svga_context(pipe); - int i; - /* Emit buffered drawing commands. + /* Emit buffered drawing commands, and any back copies. */ - svga_hwtnl_flush_retry( svga ); - - /* Emit back-copy from render target view to texture. - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (svga->curr.framebuffer.cbufs[i]) - svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]); - } - if (svga->curr.framebuffer.zsbuf) - svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf); + svga_surfaces_flush( svga ); /* Flush command queue. */ @@ -56,6 +47,28 @@ static void svga_flush( struct pipe_context *pipe, SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", __FUNCTION__, flags, fence ? *fence : 0x0); + + /* Enable to dump BMPs of the color/depth buffers each frame */ + if (0) { + if (flags & PIPE_FLUSH_FRAME) { + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]); + } + + if (0 && fb->zsbuf) { + util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf); + } + + ++frame_no; + } + } } diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 8c24fb302f7..440919c6262 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -94,7 +94,7 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) - svga_propagate_surface(pipe, dst->cbufs[i]); + svga_propagate_surface(svga, dst->cbufs[i]); } /* XXX: Actually the virtual hardware may support rendertargets with diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index e97b4e57415..4a1a37f1765 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -64,7 +64,9 @@ static void * svga_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { + struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); + /* need this for draw module. */ rast->templ = *templ; @@ -75,7 +77,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* point_quad_rasterization - ? */ /* point_size_per_vertex - ? */ /* sprite_coord_mode - ??? */ - /* bypass_vs_viewport_and_clip - handled by viewport setup */ /* flatshade_first - handled by index translation */ /* gl_rasterization_rules - XXX - viewport code */ /* line_width - draw module */ @@ -93,17 +94,22 @@ svga_create_rasterizer_state(struct pipe_context *pipe, /* Use swtnl + decomposition implement these: */ - if (templ->poly_stipple_enable) + if (templ->poly_stipple_enable) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "poly stipple"; + } - if (templ->line_width != 1.0 && - templ->line_width != 0.0) + if (templ->line_width >= 1.5f && + !svga->debug.no_line_width) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line width"; + } if (templ->line_stipple_enable) { - /* LinePattern not implemented on all backends. + /* XXX: LinePattern not implemented on all backends, and there is no + * mechanism to query it. */ - if (0) { + if (!svga->debug.force_hw_line_stipple) { SVGA3dLinePattern lp; lp.repeat = templ->line_stipple_factor + 1; lp.pattern = templ->line_stipple_pattern; @@ -111,11 +117,19 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } else { rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "line stipple"; } } - if (templ->point_smooth) + if (templ->point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; + rast->need_pipeline_points_str = "smooth points"; + } + + if (templ->line_smooth) { + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + rast->need_pipeline_lines_str = "smooth lines"; + } { int fill_front = templ->fill_front; @@ -148,6 +162,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, * front/back fill modes: */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "different front/back fillmodes"; } else { offset = offset_front; @@ -172,6 +187,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation"; } /* If we are decomposing to lines, and lines need the pipeline, @@ -182,6 +198,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing lines"; } /* Similarly for points: @@ -191,6 +208,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + rast->need_pipeline_tris_str = "decomposing points"; } if (offset) { @@ -201,9 +219,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->hw_unfilled = fill; } - - - if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { /* Turn off stuff which will get done in the draw module: */ diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index f44a0e1325a..446fcc44078 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -144,8 +144,9 @@ svga_create_sampler_state(struct pipe_context *pipe, return cso; } -static void svga_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +static void +svga_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct svga_context *svga = svga_context(pipe); unsigned i; @@ -203,9 +204,10 @@ svga_sampler_view_destroy(struct pipe_context *pipe, FREE(view); } -static void svga_set_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) +static void +svga_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct svga_context *svga = svga_context(pipe); unsigned flag_1d = 0; @@ -256,9 +258,9 @@ static void svga_set_sampler_views(struct pipe_context *pipe, void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; - svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; + svga->pipe.bind_fragment_sampler_states = svga_bind_fragment_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; - svga->pipe.set_fragment_sampler_views = svga_set_sampler_views; + svga->pipe.set_fragment_sampler_views = svga_set_fragment_sampler_views; svga->pipe.create_sampler_view = svga_create_sampler_view; svga->pipe.sampler_view_destroy = svga_sampler_view_destroy; } diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 86c79459f3e..58469910732 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index ef2a0c40f03..bed15ec02e5 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -40,6 +40,7 @@ svga_init_resource_functions(struct svga_context *svga) svga->pipe.transfer_unmap = u_transfer_unmap_vtbl; svga->pipe.transfer_destroy = u_transfer_destroy_vtbl; svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl; + svga->pipe.redefine_user_buffer = svga_redefine_user_buffer; } void diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index f12e2b68627..e1f07d655b9 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -105,9 +105,12 @@ svga_buffer_map_range( struct pipe_screen *screen, * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", - __FUNCTION__, - (sbuf->b.b.width0 + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting DMA transfers\n", + __FUNCTION__, + (sbuf->b.b.width0 + 1023)/1024); + } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); } @@ -308,6 +311,9 @@ svga_buffer_create(struct pipe_screen *screen, goto error2; } + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &sbuf->b.b; error2: @@ -341,6 +347,9 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->swbuf = ptr; sbuf->user = TRUE; + + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); return &sbuf->b.b; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index d3ec11bfd52..c559f70ec12 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -243,4 +243,10 @@ svga_winsys_buffer_create(struct svga_context *svga, unsigned usage, unsigned size); +void +svga_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); + #endif /* SVGA_BUFFER_H */ diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 3de5216a949..76a3803224a 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,6 +40,9 @@ #include "svga_debug.h" +#define MAX_DMA_SIZE (8 * 1024 * 1024) + + /** * Allocate a winsys_buffer (ie. DMA, aka GMR memory). * @@ -57,6 +60,13 @@ svga_winsys_buffer_create( struct svga_context *svga, struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_buffer *buf; + /* XXX this shouldn't be a hard-coded number; it should be queried + * somehow. + */ + if (size > MAX_DMA_SIZE) { + return NULL; + } + /* Just try */ buf = sws->buffer_create(sws, alignment, usage, size); if(!buf) { @@ -248,6 +258,7 @@ svga_buffer_upload_flush(struct svga_context *svga, { SVGA3dCopyBox *boxes; unsigned i; + struct pipe_resource *dummy; assert(sbuf->handle); assert(sbuf->hwbuf); @@ -289,9 +300,9 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->dma.svga = NULL; sbuf->dma.boxes = NULL; - /* Decrement reference count */ - pipe_reference(&(sbuf->b.b.reference), NULL); - sbuf = NULL; + /* Decrement reference count (and potentially destroy) */ + dummy = &sbuf->b.b; + pipe_resource_reference(&dummy, NULL); } @@ -638,3 +649,54 @@ svga_context_flush_buffers(struct svga_context *svga) next = curr->next; } } + + +void +svga_redefine_user_buffer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_buffer *sbuf = svga_buffer(resource); + + assert(sbuf->user); + + /* + * Release any uploaded user buffer. + * + * TODO: As an optimization, we could try to update the uploaded buffer + * instead. + */ + + pipe_resource_reference(&sbuf->uploaded.buffer, NULL); + + pipe_mutex_lock(ss->swc_mutex); + + if (offset + size > resource->width0) { + /* + * User buffers shouldn't have DMA directly, unless + * SVGA_COMBINE_USERBUFFERS is not set. + */ + + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + } + + if (sbuf->handle) { + svga_buffer_destroy_host_surface(ss, sbuf); + } + + if (sbuf->hwbuf) { + svga_buffer_destroy_hw_storage(ss, sbuf); + } + + sbuf->key.size.width = sbuf->b.b.width0 = offset + size; + } + + pipe_mutex_unlock(ss->swc_mutex); + + svga->curr.any_user_vertex_buffers = TRUE; + svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; +} diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 7c9e600b9f4..994f30719ae 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -146,16 +146,6 @@ svga_translate_format_render(enum pipe_format format) case PIPE_FORMAT_L8_UNORM: return svga_translate_format(format); -#if 1 - /* For on host conversion */ - case PIPE_FORMAT_DXT1_RGB: - return SVGA3D_X8R8G8B8; - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return SVGA3D_A8R8G8B8; -#endif - default: return SVGA3D_FORMAT_INVALID; } @@ -204,7 +194,7 @@ svga_transfer_dma_band(struct svga_context *svga, ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); if(ret != PIPE_OK) { - svga->swc->flush(svga->swc, NULL); + svga_context_flush(svga, NULL); ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); assert(ret == PIPE_OK); } @@ -225,6 +215,10 @@ svga_transfer_dma(struct svga_context *svga, SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); } + /* Ensure any pending operations on host surfaces are queued on the command + * buffer first. + */ + svga_surfaces_flush( svga ); if(!st->swbuf) { /* Do the DMA transfer in a single go */ @@ -390,11 +384,15 @@ svga_texture_get_transfer(struct pipe_context *pipe, if(st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", - __FUNCTION__, - (nblocksy*st->base.stride + 1023)/1024, - (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, - (st->hw_nblocksy*st->base.stride + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting into %u x %u KB DMA transfers\n", + __FUNCTION__, + (nblocksy*st->base.stride + 1023)/1024, + (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (st->hw_nblocksy*st->base.stride + 1023)/1024); + } + st->swbuf = MALLOC(nblocksy*st->base.stride); if(!st->swbuf) goto no_swbuf; @@ -527,7 +525,8 @@ svga_texture_create(struct pipe_screen *screen, tex->key.numFaces = 1; } - tex->key.cachable = 1; + /* XXX: Disabled for now */ + tex->key.cachable = 0; if (template->bind & PIPE_BIND_SAMPLER_VIEW) tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; @@ -571,6 +570,9 @@ svga_texture_create(struct pipe_screen *screen, if (tex->handle) SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); + debug_reference(&tex->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + return &tex->b.b; error2: diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index 6911f13f778..4f1f4b597e8 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -32,6 +32,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_string.h" #include "svga_screen.h" #include "svga_context.h" @@ -41,14 +42,24 @@ #include "svga_surface.h" +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv) +{ + char res[128]; + debug_describe_resource(res, sv->texture); + util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod); +} + struct svga_sampler_view * svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, unsigned min_lod, unsigned max_lod) { - struct svga_screen *ss = svga_screen(pt->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; + SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE; SVGA3dSurfaceFormat format = svga_translate_format(pt->format); boolean view = TRUE; @@ -68,10 +79,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, if (min_lod == 0 && max_lod >= pt->last_level) view = FALSE; - if (util_format_is_s3tc(pt->format) && view) { - format = svga_translate_format_render(pt->format); - } - if (ss->debug.no_sampler_view) view = FALSE; @@ -113,6 +120,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -126,7 +135,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, pt->last_level); sv->age = tex->age; - sv->handle = svga_texture_view_surface(pipe, tex, format, + sv->handle = svga_texture_view_surface(svga, tex, flags, format, min_lod, max_lod - min_lod + 1, -1, -1, @@ -136,6 +145,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, assert(0); sv->key.cachable = 0; sv->handle = tex->handle; + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); return sv; } @@ -143,6 +154,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, svga_sampler_view_reference(&tex->cached_view, sv); pipe_mutex_unlock(ss->tex_mutex); + debug_reference(&sv->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view, 0); + return sv; } diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h index e64665f2e58..2087c1be85e 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.h +++ b/src/gallium/drivers/svga/svga_sampler_view.h @@ -83,12 +83,16 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * void svga_destroy_sampler_view_priv(struct svga_sampler_view *v); +void +svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv); + static INLINE void svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v) { struct svga_sampler_view *old = *ptr; - if (pipe_reference(&(*ptr)->reference, &v->reference)) + if (pipe_reference_described(&(*ptr)->reference, &v->reference, + (debug_reference_descriptor)svga_debug_describe_sampler_view)) svga_destroy_sampler_view_priv(old); *ptr = v; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index d0f42c614c9..ef1d3098d51 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -225,13 +225,18 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return svgascreen->use_ps30 ? 32 : 12; return result.u; case PIPE_SHADER_CAP_MAX_ADDRS: - return svgascreen->use_ps30 ? 1 : 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + /* + * Although PS 3.0 has some addressing abilities it can only represent + * loops that can be statically determined and unrolled. Given we can + * only handle a subset of the cases that the state tracker already + * does it is better to defer loop unrolling to the state tracker. + */ + return 0; case PIPE_SHADER_CAP_MAX_PREDS: return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - return svgascreen->use_ps30 ? 1 : 0; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: @@ -360,13 +365,6 @@ svga_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_B5G5R5A1_UNORM: return FALSE; - /* Simulate ability to render into compressed textures */ - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: break; } diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h index 22d5a6d552a..7f239e7a322 100644 --- a/src/gallium/drivers/svga/svga_state.h +++ b/src/gallium/drivers/svga/svga_state.h @@ -92,4 +92,8 @@ void svga_update_state_retry( struct svga_context *svga, enum pipe_error svga_emit_initial_state( struct svga_context *svga ); +enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga ); + +enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga ); + #endif diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index fcbb35e7972..cdadb20c178 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -93,6 +93,55 @@ static int emit_framebuffer( struct svga_context *svga, } +/* + * Rebind rendertargets. + * + * Similar to emit_framebuffer, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty rendertargets are properly paged-in. + */ +enum pipe_error +svga_reemit_framebuffer_bindings(struct svga_context *svga) +{ + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) { + if (hw->cbufs[i]) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]); + if (ret != PIPE_OK) { + return ret; + } + } + } + + if (hw->zsbuf) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + + if (hw->zsbuf && + hw->zsbuf->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf); + if (ret != PIPE_OK) { + return ret; + } + } + else { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL); + if (ret != PIPE_OK) { + return ret; + } + } + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_framebuffer = { "hw framebuffer state", diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index 8ba5ac8cdb4..68c02578789 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -35,6 +35,11 @@ /*********************************************************************** */ + +/** + * Given a gallium vertex element format, return the corresponding SVGA3D + * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats. + */ static INLINE SVGA3dDeclType svga_translate_vertex_format(enum pipe_format format) { @@ -80,6 +85,7 @@ static int update_need_swvfetch( struct svga_context *svga, for (i = 0; i < svga->curr.velems->count; i++) { svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format); if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { + /* Unsupported format - use software fetch */ need_swvfetch = TRUE; break; } @@ -118,6 +124,11 @@ static int update_need_pipeline( struct svga_context *svga, __FUNCTION__, svga->curr.rast->need_pipeline, (1 << svga->curr.reduced_prim) ); + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline tris (%s), lines (%s), points (%s)\n", + __FUNCTION__, + svga->curr.rast->need_pipeline_tris_str, + svga->curr.rast->need_pipeline_lines_str, + svga->curr.rast->need_pipeline_points_str); need_pipeline = TRUE; } @@ -140,6 +151,10 @@ static int update_need_pipeline( struct svga_context *svga, svga->dirty |= SVGA_NEW_NEED_PIPELINE; } + /* DEBUG */ + if (0 && svga->state.sw.need_pipeline) + debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline); + return 0; } @@ -164,15 +179,15 @@ static int update_need_swtnl( struct svga_context *svga, boolean need_swtnl; if (svga->debug.no_swtnl) { - svga->state.sw.need_swvfetch = 0; - svga->state.sw.need_pipeline = 0; + svga->state.sw.need_swvfetch = FALSE; + svga->state.sw.need_pipeline = FALSE; } need_swtnl = (svga->state.sw.need_swvfetch || svga->state.sw.need_pipeline); if (svga->debug.force_swtnl) { - need_swtnl = 1; + need_swtnl = TRUE; } /* @@ -181,7 +196,7 @@ static int update_need_swtnl( struct svga_context *svga, * the wrong buffers and vertex formats. Try trivial/line-wide. */ if (svga->state.sw.in_swtnl_draw) - need_swtnl = 1; + need_swtnl = TRUE; if (need_swtnl != svga->state.sw.need_swtnl) { SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index f8b269a101e..c502506b93b 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -52,6 +52,16 @@ void svga_cleanup_tss_binding(struct svga_context *svga) } +struct bind_queue { + struct { + unsigned unit; + struct svga_hw_view_state *view; + } bind[PIPE_MAX_SAMPLERS]; + + unsigned bind_count; +}; + + static int update_tss_binding(struct svga_context *svga, unsigned dirty ) @@ -63,15 +73,7 @@ update_tss_binding(struct svga_context *svga, unsigned min_lod; unsigned max_lod; - - struct { - struct { - unsigned unit; - struct svga_hw_view_state *view; - } bind[PIPE_MAX_SAMPLERS]; - - unsigned bind_count; - } queue; + struct bind_queue queue; queue.bind_count = 0; @@ -164,6 +166,64 @@ fail: } +/* + * Rebind textures. + * + * Similar to update_tss_binding, but without any state checking/update. + * + * Called at the beginning of every new command buffer to ensure that + * non-dirty textures are properly paged-in. + */ +enum pipe_error +svga_reemit_tss_bindings(struct svga_context *svga) +{ + unsigned i; + enum pipe_error ret; + struct bind_queue queue; + + queue.bind_count = 0; + + for (i = 0; i < svga->state.hw_draw.num_views; i++) { + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + if (view->v) { + queue.bind[queue.bind_count].unit = i; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + } + + if (queue.bind_count) { + SVGA3dTextureState *ts; + + ret = SVGA3D_BeginSetTextureState(svga->swc, + &ts, + queue.bind_count); + if (ret != PIPE_OK) { + return ret; + } + + for (i = 0; i < queue.bind_count; i++) { + struct svga_winsys_surface *handle; + + ts[i].stage = queue.bind[i].unit; + ts[i].name = SVGA3D_TS_BIND_TEXTURE; + + assert(queue.bind[i].view->v); + handle = queue.bind[i].view->v->handle; + svga->swc->surface_relocation(svga->swc, + &ts[i].value, + handle, + SVGA_RELOC_READ); + } + + SVGA_FIFOCommitAll(svga->swc); + } + + return PIPE_OK; +} + + struct svga_tracked_state svga_hw_tss_binding = { "texture binding emit", SVGA_NEW_TEXTURE_BINDING | diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 6682a1efe66..ae9a20ebb81 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, - vbuffer->stride, vbuffer->max_index); + vbuffer->stride, ~0); translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index 3e4bed76c05..3e8fb5f0271 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -100,8 +100,9 @@ svga_texture_copy_handle(struct svga_context *svga, struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, @@ -109,7 +110,7 @@ svga_texture_view_surface(struct pipe_context *pipe, int zslice_pick, struct svga_host_surface_cache_key *key) /* OUT */ { - struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_screen *ss = svga_screen(svga->pipe.screen); struct svga_winsys_surface *handle; uint32_t i, j; unsigned z_offset = 0; @@ -118,7 +119,7 @@ svga_texture_view_surface(struct pipe_context *pipe, "svga: Create surface view: face %d zslice %d mips %d..%d\n", face_pick, zslice_pick, start_mip, start_mip+num_mip-1); - key->flags = 0; + key->flags = flags; key->format = format; key->numMipLevels = num_mip; key->size.width = u_minify(tex->b.b.width0, start_mip); @@ -161,7 +162,7 @@ svga_texture_view_surface(struct pipe_context *pipe, u_minify(tex->b.b.depth0, i + start_mip) : 1); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, tex->handle, 0, 0, z_offset, i + start_mip, @@ -183,6 +184,7 @@ svga_create_surface(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_surface *surf_tmpl) { + struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(pt); struct pipe_screen *screen = pipe->screen; struct svga_surface *s; @@ -191,6 +193,7 @@ svga_create_surface(struct pipe_context *pipe, boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE; boolean view = FALSE; + SVGA3dSurfaceFlags flags; SVGA3dSurfaceFormat format; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); @@ -219,10 +222,18 @@ svga_create_surface(struct pipe_context *pipe, s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer; s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer; - if (!render) + if (!render) { + flags = SVGA3D_SURFACE_HINT_TEXTURE; format = svga_translate_format(surf_tmpl->format); - else + } else { + if (surf_tmpl->usage & PIPE_BIND_RENDER_TARGET) { + flags = SVGA3D_SURFACE_HINT_RENDERTARGET; + } + if (surf_tmpl->usage & PIPE_BIND_DEPTH_STENCIL) { + flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + } format = svga_translate_format_render(surf_tmpl->format); + } assert(format != SVGA3D_FORMAT_INVALID); @@ -249,7 +260,8 @@ svga_create_surface(struct pipe_context *pipe, SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", pt, surf_tmpl->u.tex.level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level, + s->handle = svga_texture_view_surface(svga, tex, flags, format, + surf_tmpl->u.tex.level, 1, face, zslice, &s->key); s->real_face = 0; s->real_level = 0; @@ -329,7 +341,7 @@ void svga_mark_surfaces_dirty(struct svga_context *svga) * pipe is optional context to inline the blit command in. */ void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); struct svga_texture *tex = svga_texture(surf->texture); @@ -354,7 +366,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) if (s->handle != tex->handle) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf); - svga_texture_copy_handle(svga_context(pipe), + svga_texture_copy_handle(svga, s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, zslice, surf->u.tex.level, face, u_minify(tex->b.b.width0, surf->u.tex.level), diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index afb8326e1f3..bffc8c22c60 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -56,14 +56,15 @@ struct svga_surface extern void -svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf); +svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf); extern boolean svga_surface_needs_propagation(struct pipe_surface *surf); struct svga_winsys_surface * -svga_texture_view_surface(struct pipe_context *pipe, +svga_texture_view_surface(struct svga_context *svga, struct svga_texture *tex, + SVGA3dSurfaceFlags flags, SVGA3dSurfaceFormat format, unsigned start_mip, unsigned num_mip, diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index d5db6bf641a..ac9d637f8cb 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -87,11 +87,14 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render, svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); if(!svga_render->vbuf) { svga_context_flush(svga, NULL); + assert(!svga_render->vbuf); svga_render->vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->vbuf_size); assert(svga_render->vbuf); } @@ -261,6 +264,7 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render, svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STREAM, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 05d86e1fb16..ad29c1b6425 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -124,6 +124,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, /* Now safe to remove the need_swtnl flag in any update_state call */ svga->state.sw.in_swtnl_draw = FALSE; + svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH; return ret; } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index a7592382936..efda2f605b9 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -61,7 +61,7 @@ static void set_draw_viewport( struct svga_context *svga ) * going to be drawn with triangles, but we're not catching all * cases where that will happen. */ - if (svga->curr.rast->templ.line_width > 1.0) + if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES) { adjx = SVGA_LINE_ADJ_X + 0.175; adjy = SVGA_LINE_ADJ_Y - 0.175; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index f2591c5721a..99600cf5c00 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -57,7 +57,6 @@ translate_opcode( case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; - case TGSI_OPCODE_SSG: return SVGA3DOP_SGN; default: debug_printf("Unkown opcode %u\n", opcode); assert( 0 ); @@ -285,6 +284,41 @@ static void reset_temp_regs( struct svga_shader_emitter *emit ) } +/* Replace the src with the temporary specified in the dst, but copying + * only the necessary channels, and preserving the original swizzle (which is + * important given that several opcodes have constraints in the allowed + * swizzles). + */ +static boolean emit_repl( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register *src0) +{ + unsigned src0_swizzle; + unsigned chan; + + assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); + + src0_swizzle = src0->base.swizzle; + + dst.mask = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; + dst.mask |= 1 << swizzle; + } + assert(dst.mask); + + src0->base.swizzle = SVGA3DSWIZZLE_NONE; + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) + return FALSE; + + *src0 = src( dst ); + src0->base.swizzle = src0_swizzle; + + return TRUE; +} + + static boolean submit_op0( struct svga_shader_emitter *emit, SVGA3dShaderInstToken inst, SVGA3dShaderDestToken dest ) @@ -333,14 +367,11 @@ static boolean submit_op2( struct svga_shader_emitter *emit, src0.base.num != src1.base.num) need_temp = TRUE; - if (need_temp) - { + if (need_temp) { temp = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 )) + if (!emit_repl( emit, temp, &src0 )) return FALSE; - - src0 = src( temp ); } if (!emit_op2( emit, inst, dest, src0, src1 )) @@ -396,24 +427,18 @@ static boolean submit_op3( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) need_temp1 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp1) - { + if (need_temp1) { temp1 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 )) + if (!emit_repl( emit, temp1, &src1 )) return FALSE; - - src1 = src( temp1 ); } if (!emit_op3( emit, inst, dest, src0, src1, src2 )) @@ -478,24 +503,18 @@ static boolean submit_op4( struct svga_shader_emitter *emit, (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) need_temp3 = TRUE; - if (need_temp0) - { + if (need_temp0) { temp0 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + if (!emit_repl( emit, temp0, &src0 )) return FALSE; - - src0 = src( temp0 ); } - if (need_temp3) - { + if (need_temp3) { temp3 = get_temp( emit ); - if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + if (!emit_repl( emit, temp3, &src3 )) return FALSE; - - src3 = src( temp3 ); } if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) @@ -509,6 +528,55 @@ static boolean submit_op4( struct svga_shader_emitter *emit, } +static boolean alias_src_dst( struct src_register src, + SVGA3dShaderDestToken dst ) +{ + if (src.base.num != dst.num) + return FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != + SVGA3dShaderGetRegType(src.base.value)) + return FALSE; + + return TRUE; +} + + +static boolean submit_lrp(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register src2) +{ + SVGA3dShaderDestToken tmp; + boolean need_dst_tmp = FALSE; + + /* The dst reg must be a temporary, and not be the same as src0 or src2 */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + tmp = get_temp( emit ); + tmp.mask = dst.mask; + } + else { + tmp = dst; + } + + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; + + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -747,7 +815,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit, static boolean emit_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { - const struct src_register src = translate_src_register( + struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); @@ -755,10 +823,23 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { + /* + * Max different constant registers readable per IFC instruction is 1. + */ + + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) + return FALSE; + + src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); + } + emit->dynamic_branching_level++; return (emit_instruction( emit, if_token ) && - emit_src( emit, src ) && + emit_src( emit, src0 ) && emit_src( emit, zero ) ); } @@ -832,7 +913,7 @@ static boolean emit_cmp(struct svga_shader_emitter *emit, */ if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero)) return FALSE; - return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2); + return submit_lrp(emit, dst, src(temp), src1, src2); } /* CMP DST, SRC0, SRC2, SRC1 */ @@ -1066,6 +1147,41 @@ static boolean emit_cos(struct svga_shader_emitter *emit, return TRUE; } +static boolean emit_ssg(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp0 = get_temp( emit ); + SVGA3dShaderDestToken temp1 = get_temp( emit ); + struct src_register zero, one; + + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SGN DST, SRC0, TMP0, TMP1 */ + return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, + src( temp0 ), src( temp1 ) ); + } + + zero = get_zero_immediate( emit ); + one = scalar( zero, TGSI_SWIZZLE_W ); + zero = scalar( zero, TGSI_SWIZZLE_X ); + + /* CMP TMP0, SRC0, one, zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp0, dst.mask ), src0, one, zero )) + return FALSE; + + /* CMP TMP1, negate(SRC0), negate(one), zero */ + if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), + writemask( temp1, dst.mask ), negate( src0 ), negate( one ), + zero )) + return FALSE; + + /* ADD DST, TMP0, TMP1 */ + return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), + src( temp1 ) ); +} /* * ADD DST SRC0, negate(SRC0) @@ -1588,6 +1704,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, } else { unsigned opcode; + const struct tgsi_full_src_register *reg = &insn->Src[0]; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_DDX: @@ -1600,7 +1720,21 @@ static boolean emit_deriv(struct svga_shader_emitter *emit, return FALSE; } - return emit_simple_instruction( emit, opcode, insn ); + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + src0 = translate_src_register( emit, reg ); + + /* We cannot use negate or abs on source to dsx/dsy instruction. + */ + if (reg->Register.Absolute || + reg->Register.Negate) { + SVGA3dShaderDestToken temp = get_temp( emit ); + + if (!emit_repl( emit, temp, &src0 )) + return FALSE; + } + + return submit_op1( emit, inst, dst, src0 ); } } @@ -1624,19 +1758,6 @@ static boolean emit_arl(struct svga_shader_emitter *emit, } } -static boolean alias_src_dst( struct src_register src, - SVGA3dShaderDestToken dst ) -{ - if (src.base.num != dst.num) - return FALSE; - - if (SVGA3dShaderGetRegType(dst.value) != - SVGA3dShaderGetRegType(src.base.value)) - return FALSE; - - return TRUE; -} - static boolean emit_pow(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -1729,37 +1850,14 @@ static boolean emit_lrp(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( emit, &insn->Src[2] ); - boolean need_dst_tmp = FALSE; - - /* The dst reg must not be the same as src0 or src2 */ - if (alias_src_dst(src0, dst) || - alias_src_dst(src2, dst)) - need_dst_tmp = TRUE; - if (need_dst_tmp) { - tmp = get_temp( emit ); - tmp.mask = dst.mask; - } - else { - tmp = dst; - } - - if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) - return FALSE; - - if (need_dst_tmp) { - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; - } - - return TRUE; + return submit_lrp(emit, dst, src0, src1, src2); } @@ -2366,6 +2464,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_LRP: return emit_lrp( emit, insn ); + case TGSI_OPCODE_SSG: + return emit_ssg( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); @@ -2715,6 +2816,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) return TRUE; if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) return TRUE; } diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index 95612a80063..ad1549d9f81 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -136,7 +136,7 @@ static struct sh_opcode_info opcode_info[] = { "dsy", 1, 1, 0, 0, SVGA3DOP_INVALID, }, { "texldd", 1, 4, 0, 0, SVGA3DOP_INVALID, }, { "setp", 1, 2, 0, 0, SVGA3DOP_SETP, }, - { "texldl", 1, 2, 0, 0, SVGA3DOP_INVALID, }, + { "texldl", 1, 2, 0, 0, SVGA3DOP_TEXLDL, }, { "breakp", 0, 1, 0, 0, SVGA3DOP_INVALID, }, }; @@ -156,6 +156,8 @@ const struct sh_opcode_info *svga_opcode_info( uint op ) if (info->svga_opcode == SVGA3DOP_INVALID) { /* No valid information. Please provide number of dst/src registers. */ + _debug_printf("Missing information for opcode %u, '%s'\n", op, + opcode_info[op].mnemonic); assert( 0 ); return NULL; } diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index eaabae8ce42..d24cc623c2e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1419,6 +1419,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context, } +static void trace_redefine_user_buffer(struct pipe_context *_context, + struct pipe_resource *_resource, + unsigned offset, unsigned size) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_resource *tr_tex = trace_resource(_resource); + struct pipe_context *context = tr_context->pipe; + struct pipe_resource *resource = tr_tex->resource; + + assert(resource->screen == context->screen); + + trace_dump_call_begin("pipe_context", "redefine_user_buffer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, resource); + trace_dump_arg(uint, offset); + trace_dump_arg(uint, size); + + trace_dump_call_end(); + + context->redefine_user_buffer(context, resource, offset, size); +} static const struct debug_named_value rbug_blocker_flags[] = { @@ -1506,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region; tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write; + tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer; tr_ctx->pipe = pipe; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 155c869fbd9..18805655bd7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(resource_ptr, state, buffer); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 589cac2ddd3..24ee3fe1175 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -399,6 +399,14 @@ struct pipe_context { unsigned stride, unsigned layer_stride); + + /* Notify a driver that a content of a user buffer has been changed. + * The changed range is [offset, offset+size-1]. + * The new width0 of the buffer is offset+size. */ + void (*redefine_user_buffer)(struct pipe_context *, + struct pipe_resource *, + unsigned offset, + unsigned size); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a7f8503251b..8cf738fa2c0 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -225,13 +225,13 @@ enum pipe_transfer_usage { /** * Discards the memory within the mapped region. * - * It should not be used with PIPE_TRANSFER_CPU_READ. + * It should not be used with PIPE_TRANSFER_READ. * * See also: * - OpenGL's ARB_map_buffer_range extension, MAP_INVALIDATE_RANGE_BIT flag. - * - Direct3D's D3DLOCK_DISCARD flag. */ - PIPE_TRANSFER_DISCARD = (1 << 8), + PIPE_TRANSFER_DISCARD = (1 << 8), /* DEPRECATED */ + PIPE_TRANSFER_DISCARD_RANGE = (1 << 8), /** * Fail if the resource cannot be mapped immediately. @@ -246,7 +246,7 @@ enum pipe_transfer_usage { /** * Do not attempt to synchronize pending operations on the resource when mapping. * - * It should not be used with PIPE_TRANSFER_CPU_READ. + * It should not be used with PIPE_TRANSFER_READ. * * See also: * - OpenGL's ARB_map_buffer_range extension, MAP_UNSYNCHRONIZED_BIT flag. @@ -260,13 +260,28 @@ enum pipe_transfer_usage { * Written ranges will be notified later with * pipe_context::transfer_flush_region. * - * It should not be used with PIPE_TRANSFER_CPU_READ. + * It should not be used with PIPE_TRANSFER_READ. * * See also: * - pipe_context::transfer_flush_region * - OpenGL's ARB_map_buffer_range extension, MAP_FLUSH_EXPLICIT_BIT flag. */ - PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11) + PIPE_TRANSFER_FLUSH_EXPLICIT = (1 << 11), + + /** + * Discards all memory backing the resource. + * + * It should not be used with PIPE_TRANSFER_READ. + * + * This is equivalent to: + * - OpenGL's ARB_map_buffer_range extension, MAP_INVALIDATE_BUFFER_BIT + * - BufferData(NULL) on a GL buffer + * - Direct3D's D3DLOCK_DISCARD flag. + * - WDDM's D3DDDICB_LOCKFLAGS.Discard flag. + * - D3D10 DDI's D3D10_DDI_MAP_WRITE_DISCARD flag + * - D3D10's D3D10_MAP_WRITE_DISCARD flag. + */ + PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE = (1 << 12) }; @@ -380,7 +395,8 @@ enum pipe_transfer_usage { #define PIPE_QUERY_SO_STATISTICS 5 #define PIPE_QUERY_GPU_FINISHED 6 #define PIPE_QUERY_TIMESTAMP_DISJOINT 7 -#define PIPE_QUERY_TYPES 8 +#define PIPE_QUERY_OCCLUSION_PREDICATE 8 +#define PIPE_QUERY_TYPES 9 /** diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 3aa11be4b5b..e2cc32222de 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -33,6 +33,16 @@ extern "C" { #endif + +enum pipe_type { + PIPE_TYPE_UNORM = 0, + PIPE_TYPE_SNORM, + PIPE_TYPE_SINT, + PIPE_TYPE_UINT, + PIPE_TYPE_FLOAT, + PIPE_TYPE_COUNT +}; + /** * Texture/surface image formats (preliminary) */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 0a9e14154d7..d3a3632654c 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -75,6 +75,7 @@ enum tgsi_file_type { TGSI_FILE_SYSTEM_VALUE =9, TGSI_FILE_IMMEDIATE_ARRAY =10, TGSI_FILE_TEMPORARY_ARRAY =11, + TGSI_FILE_RESOURCE =12, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -153,6 +154,14 @@ struct tgsi_declaration_semantic unsigned Padding : 8; }; +struct tgsi_declaration_resource { + unsigned Resource : 8; /**< one of TGSI_TEXTURE_ */ + unsigned ReturnTypeX : 6; /**< one of enum pipe_type */ + unsigned ReturnTypeY : 6; /**< one of enum pipe_type */ + unsigned ReturnTypeZ : 6; /**< one of enum pipe_type */ + unsigned ReturnTypeW : 6; /**< one of enum pipe_type */ +}; + #define TGSI_IMM_FLOAT32 0 #define TGSI_IMM_UINT32 1 #define TGSI_IMM_INT32 2 @@ -339,7 +348,22 @@ struct tgsi_property_data { #define TGSI_OPCODE_CASE 142 #define TGSI_OPCODE_DEFAULT 143 #define TGSI_OPCODE_ENDSWITCH 144 -#define TGSI_OPCODE_LAST 145 + +/* resource related opcodes */ +#define TGSI_OPCODE_LOAD 145 +#define TGSI_OPCODE_LOAD_MS 146 +#define TGSI_OPCODE_SAMPLE 147 +#define TGSI_OPCODE_SAMPLE_B 148 +#define TGSI_OPCODE_SAMPLE_C 149 +#define TGSI_OPCODE_SAMPLE_C_LZ 150 +#define TGSI_OPCODE_SAMPLE_D 151 +#define TGSI_OPCODE_SAMPLE_L 152 +#define TGSI_OPCODE_GATHER4 153 +#define TGSI_OPCODE_RESINFO 154 +#define TGSI_OPCODE_SAMPLE_POS 155 +#define TGSI_OPCODE_SAMPLE_INFO 156 + +#define TGSI_OPCODE_LAST 157 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ @@ -406,7 +430,9 @@ struct tgsi_instruction_label #define TGSI_TEXTURE_SHADOW1D 6 #define TGSI_TEXTURE_SHADOW2D 7 #define TGSI_TEXTURE_SHADOWRECT 8 -#define TGSI_TEXTURE_COUNT 9 +#define TGSI_TEXTURE_1D_ARRAY 9 +#define TGSI_TEXTURE_2D_ARRAY 10 +#define TGSI_TEXTURE_COUNT 11 struct tgsi_instruction_texture { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 226ae8667b7..cf6c5b50268 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -62,6 +62,7 @@ extern "C" { #define PIPE_MAX_GEOMETRY_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 32 #define PIPE_MAX_SHADER_OUTPUTS 32 +#define PIPE_MAX_SHADER_RESOURCES 32 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_SO_BUFFERS 4 @@ -389,11 +390,11 @@ struct pipe_stream_output_state struct pipe_transfer { struct pipe_resource *resource; /**< resource to transfer to/from */ - unsigned level; + unsigned level; /**< texture mipmap level */ enum pipe_transfer_usage usage; - struct pipe_box box; - unsigned stride; - unsigned layer_stride; + struct pipe_box box; /**< region of the resource to access */ + unsigned stride; /**< row stride in bytes */ + unsigned layer_stride; /**< image/layer stride in bytes */ void *data; }; @@ -407,7 +408,6 @@ struct pipe_transfer struct pipe_vertex_buffer { unsigned stride; /**< stride to same attrib in next vertex, in bytes */ - unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ struct pipe_resource *buffer; /**< the actual buffer */ }; diff --git a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp index 2ff24e17d41..2e45f3f43e9 100644 --- a/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp +++ b/src/gallium/state_trackers/d3d1x/dxgi/src/dxgi_native.cpp @@ -794,9 +794,9 @@ struct dxgi_blitter const unsigned semantic_indices[] = { 0, 0 }; vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, semantic_indices); - vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(quad_data)); + vbuf.buffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, sizeof(quad_data)); vbuf.buffer_offset = 0; - vbuf.max_index = ~0; vbuf.stride = 4 * sizeof(float); pipe_buffer_write(pipe, vbuf.buffer, 0, sizeof(quad_data), quad_data); diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h index e1ba6c184fd..542d6591293 100644 --- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h +++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h @@ -623,7 +623,6 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe> vertex_buffers[start + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0; vertex_buffers[start + i].buffer_offset = new_offsets[i]; vertex_buffers[start + i].stride = new_strides[i]; - vertex_buffers[start + i].max_index = ~0; last_different = i; } } diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index 3d5d24e692c..fc68ee13eaa 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -141,12 +141,18 @@ GLboolean dri_unbind_context(__DRIcontext * cPriv) { /* dri_util.c ensures cPriv is not null */ + struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); struct dri_context *ctx = dri_context(cPriv); + struct dri_drawable *draw = dri_drawable(ctx->dPriv); + struct dri_drawable *read = dri_drawable(ctx->rPriv); + struct st_api *stapi = screen->st_api; if (--ctx->bind_count == 0) { if (ctx->st == ctx->stapi->get_current(ctx->stapi)) { ctx->st->flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); - ctx->stapi->make_current(ctx->stapi, NULL, NULL, NULL); + stapi->make_current(stapi, NULL, NULL, NULL); + draw->context = NULL; + read->context = NULL; } } @@ -169,17 +175,23 @@ dri_make_current(__DRIcontext * cPriv, ++ctx->bind_count; + if (!driDrawPriv && !driReadPriv) + return ctx->stapi->make_current(ctx->stapi, ctx->st, NULL, NULL); + else if (!driDrawPriv || !driReadPriv) + return GL_FALSE; + + draw->context = ctx; if (ctx->dPriv != driDrawPriv) { ctx->dPriv = driDrawPriv; draw->texture_stamp = driDrawPriv->lastStamp - 1; } + read->context = ctx; if (ctx->rPriv != driReadPriv) { ctx->rPriv = driReadPriv; read->texture_stamp = driReadPriv->lastStamp - 1; } - ctx->stapi->make_current(ctx->stapi, ctx->st, - (draw) ? &draw->base : NULL, (read) ? &read->base : NULL); + ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base); return GL_TRUE; } diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c index 060748622c9..28a33ac7d07 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.c +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c @@ -132,6 +132,7 @@ dri_create_buffer(__DRIscreen * sPriv, drawable->base.validate = dri_st_framebuffer_validate; drawable->base.st_manager_private = (void *) drawable; + drawable->screen = screen; drawable->sPriv = sPriv; drawable->dPriv = dPriv; dPriv->driverPrivate = (void *)drawable; diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h index 2ff6b713293..7f1aa512ca1 100644 --- a/src/gallium/state_trackers/dri/common/dri_drawable.h +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h @@ -41,6 +41,9 @@ struct dri_drawable struct st_framebuffer_iface base; struct st_visual stvis; + struct dri_screen *screen; + struct dri_context *context; + /* dri */ __DRIdrawable *dPriv; __DRIscreen *sPriv; diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c index f6e22c74b4e..69c2e7e9ec8 100644 --- a/src/gallium/state_trackers/dri/common/dri_screen.c +++ b/src/gallium/state_trackers/dri/common/dri_screen.c @@ -235,7 +235,6 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, return; stvis->samples = mode->samples; - stvis->render_buffer = ST_ATTACHMENT_INVALID; if (mode->redBits == 8) { if (mode->alphaBits == 8) @@ -274,8 +273,11 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen, PIPE_FORMAT_R16G16B16A16_SNORM : PIPE_FORMAT_NONE; stvis->buffer_mask |= ST_ATTACHMENT_FRONT_LEFT_MASK; - if (mode->doubleBufferMode) + stvis->render_buffer = ST_ATTACHMENT_FRONT_LEFT; + if (mode->doubleBufferMode) { stvis->buffer_mask |= ST_ATTACHMENT_BACK_LEFT_MASK; + stvis->render_buffer = ST_ATTACHMENT_BACK_LEFT; + } if (mode->stereoMode) { stvis->buffer_mask |= ST_ATTACHMENT_FRONT_RIGHT_MASK; if (mode->doubleBufferMode) diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c index a9d05a80fbd..0181588096d 100644 --- a/src/gallium/state_trackers/dri/drm/dri2.c +++ b/src/gallium/state_trackers/dri/drm/dri2.c @@ -38,6 +38,7 @@ #include "dri_screen.h" #include "dri_context.h" #include "dri_drawable.h" +#include "dri2_buffer.h" /** * DRI2 flush extension. @@ -51,7 +52,7 @@ static void dri2_invalidate_drawable(__DRIdrawable *dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); - struct dri_context *ctx = dri_context(dPriv->driContextPriv); + struct dri_context *ctx = drawable->context; dri2InvalidateDrawable(dPriv); drawable->dPriv->lastStamp = *drawable->dPriv->pStamp; @@ -259,6 +260,91 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, memcpy(drawable->old, buffers, sizeof(__DRIbuffer) * count); } +static __DRIbuffer * +dri2_allocate_buffer(__DRIscreen *sPriv, + unsigned attachment, unsigned format, + int width, int height) +{ + struct dri_screen *screen = dri_screen(sPriv); + struct dri2_buffer *buffer; + struct pipe_resource templ; + enum st_attachment_type statt; + enum pipe_format pf; + unsigned bind; + struct winsys_handle whandle; + + switch (attachment) { + case __DRI_BUFFER_FRONT_LEFT: + case __DRI_BUFFER_FAKE_FRONT_LEFT: + statt = ST_ATTACHMENT_FRONT_LEFT; + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case __DRI_BUFFER_BACK_LEFT: + statt = ST_ATTACHMENT_BACK_LEFT; + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case __DRI_BUFFER_DEPTH: + case __DRI_BUFFER_DEPTH_STENCIL: + case __DRI_BUFFER_STENCIL: + statt = ST_ATTACHMENT_DEPTH_STENCIL; + bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */ + break; + default: + statt = ST_ATTACHMENT_INVALID; + break; + } + + switch (format) { + case 32: + pf = PIPE_FORMAT_B8G8R8X8_UNORM; + break; + case 16: + pf = PIPE_FORMAT_Z16_UNORM; + break; + default: + return NULL; + } + + buffer = CALLOC_STRUCT(dri2_buffer); + if (!buffer) + return NULL; + + memset(&templ, 0, sizeof(templ)); + templ.bind = bind; + templ.format = pf; + templ.target = PIPE_TEXTURE_2D; + templ.last_level = 0; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; + + buffer->resource = + screen->base.screen->resource_create(screen->base.screen, &templ); + if (!buffer->resource) + return NULL; + + memset(&whandle, 0, sizeof(whandle)); + whandle.type = DRM_API_HANDLE_TYPE_SHARED; + screen->base.screen->resource_get_handle(screen->base.screen, + buffer->resource, &whandle); + + buffer->base.attachment = attachment; + buffer->base.name = whandle.handle; + buffer->base.cpp = util_format_get_blocksize(pf); + buffer->base.pitch = whandle.stride; + + return &buffer->base; +} + +static void +dri2_release_buffer(__DRIscreen *sPriv, __DRIbuffer *bPriv) +{ + struct dri2_buffer *buffer = dri2_buffer(bPriv); + + pipe_resource_reference(&buffer->resource, NULL); + FREE(buffer); +} + /* * Backend functions for st_framebuffer interface. */ @@ -373,7 +459,7 @@ static __DRIimage * dri2_create_image_from_renderbuffer(__DRIcontext *context, int renderbuffer, void *loaderPrivate) { - struct dri_context *ctx = dri_context(context->driverPrivate); + struct dri_context *ctx = dri_context(context); if (!ctx->st->get_resource_for_egl_image) return NULL; @@ -601,6 +687,9 @@ const struct __DriverAPIRec driDriverAPI = { .SwapBuffers = NULL, .CopySubBuffer = NULL, + + .AllocateBuffer = dri2_allocate_buffer, + .ReleaseBuffer = dri2_release_buffer, }; /* This is the table of extensions that the loader will dlsym() for. */ diff --git a/src/gallium/state_trackers/dri/drm/dri2_buffer.h b/src/gallium/state_trackers/dri/drm/dri2_buffer.h new file mode 100644 index 00000000000..1cd8dbbda22 --- /dev/null +++ b/src/gallium/state_trackers/dri/drm/dri2_buffer.h @@ -0,0 +1,22 @@ +#ifndef DRI2_BUFFER_H +#define DRI2_BUFFER_H + +#include "dri_wrapper.h" + +struct pipe_surface; + +struct dri2_buffer +{ + __DRIbuffer base; + struct pipe_resource *resource; +}; + +static INLINE struct dri2_buffer * +dri2_buffer(__DRIbuffer * driBufferPriv) +{ + return (struct dri2_buffer *) driBufferPriv; +} + +#endif + +/* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 8cfcef968ed..98167cceeb3 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -23,6 +23,13 @@ x11_SOURCES = $(wildcard x11/*.c) \ $(TOP)/src/glx/dri2.c x11_OBJECTS = $(x11_SOURCES:.c=.o) +wayland_INCLUDES = \ + -I$(TOP)/src/gallium/winsys \ + -I$(TOP)/src/egl/wayland \ + $(shell pkg-config --cflags-only-I libdrm wayland-client) + +wayland_SOURCES = $(wildcard wayland/*.c) +wayland_OBJECTS = $(wayland_SOURCES:.c=.o) drm_INCLUDES = -I$(TOP)/src/gallium/winsys $(shell pkg-config --cflags-only-I libdrm) drm_SOURCES = $(wildcard drm/*.c) @@ -45,6 +52,10 @@ ifneq ($(findstring x11, $(EGL_PLATFORMS)),) EGL_OBJECTS += $(x11_OBJECTS) EGL_CPPFLAGS += -DHAVE_X11_BACKEND endif +ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) +EGL_OBJECTS += $(wayland_OBJECTS) +EGL_CPPFLAGS += -DHAVE_WAYLAND_BACKEND +endif ifneq ($(findstring drm, $(EGL_PLATFORMS)),) EGL_OBJECTS += $(drm_OBJECTS) EGL_CPPFLAGS += -DHAVE_DRM_BACKEND @@ -87,6 +98,9 @@ $(common_OBJECTS): %.o: %.c $(x11_OBJECTS): %.o: %.c $(call egl-cc,x11) +$(wayland_OBJECTS): %.o: %.c + $(call egl-cc,wayland) + $(drm_OBJECTS): %.o: %.c $(call egl-cc,drm) diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 9024f945b8c..6107df48822 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -102,6 +102,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat) #ifdef HAVE_X11_BACKEND nplat = native_get_x11_platform(); #endif + break; + case _EGL_PLATFORM_WAYLAND: + plat_name = "wayland"; +#ifdef HAVE_WAYLAND_BACKEND + nplat = native_get_wayland_platform(); +#endif break; case _EGL_PLATFORM_DRM: plat_name = "DRM"; @@ -546,6 +552,9 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy) dpy->Extensions.MESA_drm_image = EGL_TRUE; } + if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer) + dpy->Extensions.MESA_drm_image = EGL_TRUE; + if (egl_g3d_add_configs(drv, dpy, 1) == 1) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(unable to add configs)"); goto fail; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c index 78c035a2af0..81ce7ab45c7 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c @@ -135,7 +135,7 @@ egl_g3d_reference_drm_buffer(_EGLDisplay *dpy, EGLint name, _EGLImageAttribs attrs; EGLint format; - if (dpy->Platform != _EGL_PLATFORM_DRM) + if (!dpy->Extensions.MESA_drm_image) return NULL; if (_eglParseImageAttribList(&attrs, dpy, attribs) != EGL_SUCCESS) @@ -295,7 +295,7 @@ egl_g3d_export_drm_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *img, struct egl_g3d_image *gimg = egl_g3d_image(img); struct winsys_handle wsh; - if (dpy->Platform != _EGL_PLATFORM_DRM) + if (!dpy->Extensions.MESA_drm_image) return EGL_FALSE; /* get shared handle */ diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 6461b5edbdf..42889075a39 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -239,6 +239,9 @@ const struct native_platform * native_get_x11_platform(void); const struct native_platform * +native_get_wayland_platform(void); + +const struct native_platform * native_get_drm_platform(void); const struct native_platform * diff --git a/src/gallium/state_trackers/egl/common/native_helper.c b/src/gallium/state_trackers/egl/common/native_helper.c index 0f2d02032b5..a9d8f32e007 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.c +++ b/src/gallium/state_trackers/egl/common/native_helper.c @@ -143,6 +143,14 @@ resource_surface_add_resources(struct resource_surface *rsurf, return ((rsurf->resource_mask & resource_mask) == resource_mask); } +void +resource_surface_import_resource(struct resource_surface *rsurf, + enum native_attachment which, + struct pipe_resource *pres) +{ + pipe_resource_reference(&rsurf->resources[which], pres); + rsurf->resource_mask |= 1 << which; +} void resource_surface_get_resources(struct resource_surface *rsurf, diff --git a/src/gallium/state_trackers/egl/common/native_helper.h b/src/gallium/state_trackers/egl/common/native_helper.h index d1569ac3ea6..03995de19ae 100644 --- a/src/gallium/state_trackers/egl/common/native_helper.h +++ b/src/gallium/state_trackers/egl/common/native_helper.h @@ -51,6 +51,11 @@ resource_surface_add_resources(struct resource_surface *rsurf, uint resource_mask); void +resource_surface_import_resource(struct resource_surface *rsurf, + enum native_attachment which, + struct pipe_resource *pres); + +void resource_surface_get_resources(struct resource_surface *rsurf, struct pipe_resource **resources, uint resource_mask); diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index 14c134ea1ad..6932f30a6a4 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -178,7 +178,7 @@ drm_display_init_screen(struct native_display *ndpy) drmFreeVersion(version); if (!drmdpy->base.screen) { - _eglLog(_EGL_WARNING, "failed to create DRM screen"); + _eglLog(_EGL_DEBUG, "failed to create DRM screen"); return FALSE; } diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.c b/src/gallium/state_trackers/egl/wayland/native_wayland.c new file mode 100644 index 00000000000..d4d5f9c2ebb --- /dev/null +++ b/src/gallium/state_trackers/egl/wayland/native_wayland.c @@ -0,0 +1,626 @@ +/* + * Mesa 3-D graphics library + * Version: 7.11 + * + * Copyright (C) 2011 Benjamin Franzke <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "state_tracker/drm_driver.h" + +#include "egllog.h" + +#include "native_wayland.h" + +/* see get_drm_screen_name */ +#include <radeon_drm.h> +#include "radeon/drm/radeon_drm_public.h" + +#include <wayland-client.h> +#include "wayland-egl-priv.h" + +#include <xf86drm.h> + +static struct native_event_handler *wayland_event_handler; + +static void +sync_callback(void *data) +{ + int *done = data; + + *done = 1; +} + +static void +force_roundtrip(struct wl_display *display) +{ + int done = 0; + + wl_display_sync_callback(display, sync_callback, &done); + wl_display_iterate(display, WL_DISPLAY_WRITABLE); + while (!done) + wl_display_iterate(display, WL_DISPLAY_READABLE); +} + +static const struct native_config ** +wayland_display_get_configs (struct native_display *ndpy, int *num_configs) +{ + struct wayland_display *display = wayland_display(ndpy); + const struct native_config **configs; + + if (!display->config) { + struct native_config *nconf; + enum pipe_format format; + display->config = CALLOC(1, sizeof(*display->config)); + if (!display->config) + return NULL; + nconf = &display->config->base; + + nconf->buffer_mask = + (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | + (1 << NATIVE_ATTACHMENT_BACK_LEFT); + + format = PIPE_FORMAT_B8G8R8A8_UNORM; + + nconf->color_format = format; + nconf->window_bit = TRUE; + nconf->pixmap_bit = TRUE; + } + + configs = MALLOC(sizeof(*configs)); + if (configs) { + configs[0] = &display->config->base; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static int +wayland_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + case NATIVE_PARAM_USE_NATIVE_BUFFER: + case NATIVE_PARAM_PRESERVE_BUFFER: + case NATIVE_PARAM_MAX_SWAP_INTERVAL: + default: + val = 0; + break; + } + + return val; +} + +static boolean +wayland_display_is_pixmap_supported(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + /* all wl_egl_pixmaps are supported */ + + return TRUE; +} + +static void +wayland_display_destroy(struct native_display *ndpy) +{ + struct wayland_display *display = wayland_display(ndpy); + + if (display->config) + FREE(display->config); + + if (display->base.screen) + display->base.screen->destroy(display->base.screen); + + FREE(display); +} + + +static struct wl_buffer * +wayland_create_buffer(struct wayland_surface *surface, + enum native_attachment attachment) +{ + struct wayland_display *display = surface->display; + struct pipe_resource *resource; + struct winsys_handle wsh; + uint width, height; + + resource = resource_surface_get_single_resource(surface->rsurf, attachment); + resource_surface_get_size(surface->rsurf, &width, &height); + + wsh.type = DRM_API_HANDLE_TYPE_SHARED; + display->base.screen->resource_get_handle(display->base.screen, resource, &wsh); + + pipe_resource_reference(&resource, NULL); + + return wl_drm_create_buffer(display->dpy->drm, wsh.handle, + width, height, + wsh.stride, surface->win->visual); +} + +static void +wayland_pixmap_destroy(struct wl_egl_pixmap *egl_pixmap) +{ + struct pipe_resource *resource = egl_pixmap->driver_private; + + assert(resource); + + pipe_resource_reference(&resource, NULL); + + egl_pixmap->driver_private = NULL; + egl_pixmap->destroy = NULL; + egl_pixmap->name = 0; +} + +static void +wayland_pixmap_surface_intialize(struct wayland_surface *surface) +{ + struct native_display *ndpy = &surface->display->base; + struct pipe_resource *resource; + struct winsys_handle wsh; + const enum native_attachment front_natt = NATIVE_ATTACHMENT_FRONT_LEFT; + + if (surface->pix->name > 0) + return; + + resource = resource_surface_get_single_resource(surface->rsurf, front_natt); + + wsh.type = DRM_API_HANDLE_TYPE_SHARED; + ndpy->screen->resource_get_handle(ndpy->screen, resource, &wsh); + + surface->pix->name = wsh.handle; + surface->pix->stride = wsh.stride; + surface->pix->destroy = wayland_pixmap_destroy; + surface->pix->driver_private = resource; +} + +static void +wayland_release_pending_resource(void *data) +{ + struct wayland_surface *surface = data; + + /* FIXME: print internal error */ + if (!surface->pending_resource) + return; + + pipe_resource_reference(&surface->pending_resource, NULL); +} + +static void +wayland_window_surface_handle_resize(struct wayland_surface *surface) +{ + struct wayland_display *display = surface->display; + struct pipe_resource *front_resource; + const enum native_attachment front_natt = NATIVE_ATTACHMENT_FRONT_LEFT; + int i; + + front_resource = resource_surface_get_single_resource(surface->rsurf, + front_natt); + if (resource_surface_set_size(surface->rsurf, + surface->win->width, surface->win->height)) { + + if (surface->pending_resource) + force_roundtrip(display->dpy->display); + + if (front_resource) { + surface->pending_resource = front_resource; + front_resource = NULL; + wl_display_sync_callback(display->dpy->display, + wayland_release_pending_resource, surface); + } + + for (i = 0; i < WL_BUFFER_COUNT; ++i) { + if (surface->buffer[i]) + wl_buffer_destroy(surface->buffer[i]); + surface->buffer[i] = NULL; + } + } + pipe_resource_reference(&front_resource, NULL); + + surface->dx = surface->win->dx; + surface->dy = surface->win->dy; + surface->win->dx = 0; + surface->win->dy = 0; +} + +static boolean +wayland_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct wayland_surface *surface = wayland_surface(nsurf); + + if (surface->type == WL_WINDOW_SURFACE) + wayland_window_surface_handle_resize(surface); + + if (!resource_surface_add_resources(surface->rsurf, attachment_mask | + surface->attachment_mask)) + return FALSE; + + if (textures) + resource_surface_get_resources(surface->rsurf, textures, attachment_mask); + + if (seq_num) + *seq_num = surface->sequence_number; + + resource_surface_get_size(surface->rsurf, (uint *) width, (uint *) height); + + if (surface->type == WL_PIXMAP_SURFACE) + wayland_pixmap_surface_intialize(surface); + + return TRUE; +} + +static void +wayland_frame_callback(void *data, uint32_t time) +{ + struct wayland_surface *surface = data; + + surface->block_swap_buffers = FALSE; +} + +static INLINE void +wayland_buffers_swap(struct wl_buffer **buffer, + enum wayland_buffer_type buf1, + enum wayland_buffer_type buf2) +{ + struct wl_buffer *tmp = buffer[buf1]; + buffer[buf1] = buffer[buf2]; + buffer[buf2] = tmp; +} + +static boolean +wayland_surface_swap_buffers(struct native_surface *nsurf) +{ + struct wayland_surface *surface = wayland_surface(nsurf); + struct wayland_display *display = surface->display; + + while (surface->block_swap_buffers) + wl_display_iterate(display->dpy->display, WL_DISPLAY_READABLE); + + surface->block_swap_buffers = TRUE; + wl_display_frame_callback(display->dpy->display, wayland_frame_callback, + surface); + + if (surface->type == WL_WINDOW_SURFACE) { + resource_surface_swap_buffers(surface->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, FALSE); + + wayland_buffers_swap(surface->buffer, WL_BUFFER_FRONT, WL_BUFFER_BACK); + + if (surface->buffer[WL_BUFFER_FRONT] == NULL) + surface->buffer[WL_BUFFER_FRONT] = + wayland_create_buffer(surface, NATIVE_ATTACHMENT_FRONT_LEFT); + + wl_surface_attach(surface->win->surface, surface->buffer[WL_BUFFER_FRONT], + surface->dx, surface->dy); + + resource_surface_get_size(surface->rsurf, + (uint *) &surface->win->attached_width, + (uint *) &surface->win->attached_height); + surface->dx = 0; + surface->dy = 0; + } + + surface->sequence_number++; + wayland_event_handler->invalid_surface(&display->base, + &surface->base, surface->sequence_number); + + return TRUE; +} + +static boolean +wayland_surface_present(struct native_surface *nsurf, + enum native_attachment natt, + boolean preserve, + uint swap_interval) +{ + struct wayland_surface *surface = wayland_surface(nsurf); + uint width, height; + boolean ret; + + if (preserve || swap_interval) + return FALSE; + + switch (natt) { + case NATIVE_ATTACHMENT_FRONT_LEFT: + ret = TRUE; + break; + case NATIVE_ATTACHMENT_BACK_LEFT: + ret = wayland_surface_swap_buffers(nsurf); + break; + default: + ret = FALSE; + break; + } + + if (surface->type == WL_WINDOW_SURFACE) { + resource_surface_get_size(surface->rsurf, &width, &height); + wl_surface_damage(surface->win->surface, 0, 0, width, height); + } + + return ret; +} + +static void +wayland_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +wayland_surface_destroy(struct native_surface *nsurf) +{ + struct wayland_surface *surface = wayland_surface(nsurf); + enum wayland_buffer_type buffer; + + for (buffer = 0; buffer < WL_BUFFER_COUNT; ++buffer) { + if (surface->buffer[buffer]) + wl_buffer_destroy(surface->buffer[buffer]); + } + + resource_surface_destroy(surface->rsurf); + FREE(surface); +} + +static struct native_surface * +wayland_create_pixmap_surface(struct native_display *ndpy, + EGLNativePixmapType pix, + const struct native_config *nconf) +{ + struct wayland_display *display = wayland_display(ndpy); + struct wayland_config *config = wayland_config(nconf); + struct wayland_surface *surface; + struct wl_egl_pixmap *egl_pixmap = (struct wl_egl_pixmap *) pix; + enum native_attachment natt = NATIVE_ATTACHMENT_FRONT_LEFT; + + surface = CALLOC_STRUCT(wayland_surface); + if (!surface) + return NULL; + + surface->display = display; + + surface->pending_resource = NULL; + surface->type = WL_PIXMAP_SURFACE; + surface->pix = egl_pixmap; + + if (surface->pix->visual == wl_display_get_rgb_visual(display->dpy->display)) + surface->color_format = PIPE_FORMAT_B8G8R8X8_UNORM; + else + surface->color_format = PIPE_FORMAT_B8G8R8A8_UNORM; + + surface->attachment_mask = (1 << NATIVE_ATTACHMENT_FRONT_LEFT); + + surface->rsurf = resource_surface_create(display->base.screen, + surface->color_format, + PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT); + + if (!surface->rsurf) { + FREE(surface); + return NULL; + } + + resource_surface_set_size(surface->rsurf, + egl_pixmap->width, egl_pixmap->height); + + /* the pixmap is already allocated, so import it */ + if (surface->pix->name > 0) + resource_surface_import_resource(surface->rsurf, natt, + surface->pix->driver_private); + + surface->base.destroy = wayland_surface_destroy; + surface->base.present = wayland_surface_present; + surface->base.validate = wayland_surface_validate; + surface->base.wait = wayland_surface_wait; + + return &surface->base; +} + +static struct native_surface * +wayland_create_window_surface(struct native_display *ndpy, + EGLNativeWindowType win, + const struct native_config *nconf) +{ + struct wayland_display *display = wayland_display(ndpy); + struct wayland_config *config = wayland_config(nconf); + struct wayland_surface *surface; + + surface = CALLOC_STRUCT(wayland_surface); + if (!surface) + return NULL; + + surface->display = display; + surface->color_format = config->base.color_format; + + surface->win = (struct wl_egl_window *) win; + + surface->pending_resource = NULL; + surface->block_swap_buffers = FALSE; + surface->type = WL_WINDOW_SURFACE; + + surface->buffer[WL_BUFFER_FRONT] = NULL; + surface->buffer[WL_BUFFER_BACK] = NULL; + surface->attachment_mask = (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | + (1 << NATIVE_ATTACHMENT_BACK_LEFT); + + surface->rsurf = resource_surface_create(display->base.screen, + surface->color_format, + PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT); + + if (!surface->rsurf) { + FREE(surface); + return NULL; + } + + surface->base.destroy = wayland_surface_destroy; + surface->base.present = wayland_surface_present; + surface->base.validate = wayland_surface_validate; + surface->base.wait = wayland_surface_wait; + + return &surface->base; +} + +static const char * +get_drm_screen_name(int fd, drmVersionPtr version) +{ + const char *name = version->name; + + if (name && !strcmp(name, "radeon")) { + int chip_id; + struct drm_radeon_info info; + + memset(&info, 0, sizeof(info)); + info.request = RADEON_INFO_DEVICE_ID; + info.value = pointer_to_intptr(&chip_id); + if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) + return NULL; + + name = is_r3xx(chip_id) ? "r300" : "r600"; + } + + return name; +} + +static boolean +wayland_display_init_screen(struct native_display *ndpy) +{ + struct wayland_display *display = wayland_display(ndpy); + drmVersionPtr version; + const char *driver_name; + + if (display->dpy->fd == -1) + force_roundtrip(display->dpy->display); + if (display->dpy->fd == -1) + return FALSE; + + if (!display->dpy->authenticated) + force_roundtrip(display->dpy->display); + if (!display->dpy->authenticated) + return FALSE; + + version = drmGetVersion(display->dpy->fd); + if (!version) { + _eglLog(_EGL_WARNING, "invalid fd %d", display->dpy->fd); + return FALSE; + } + + /* FIXME: share this with native_drm or egl_dri2 */ + driver_name = get_drm_screen_name(display->dpy->fd, version); + + display->base.screen = + wayland_event_handler->new_drm_screen(&display->base, + driver_name, display->dpy->fd); + drmFreeVersion(version); + + if (!display->base.screen) { + _eglLog(_EGL_WARNING, "failed to create DRM screen"); + return FALSE; + } + + return TRUE; +} + + +static void +wayland_set_event_handler(struct native_event_handler *event_handler) +{ + wayland_event_handler = event_handler; +} + +static struct pipe_resource * +wayland_display_import_buffer(struct native_display *ndpy, + const struct pipe_resource *templ, + void *buf) +{ + return ndpy->screen->resource_from_handle(ndpy->screen, + templ, (struct winsys_handle *) buf); +} + +static boolean +wayland_display_export_buffer(struct native_display *ndpy, + struct pipe_resource *res, + void *buf) +{ + return ndpy->screen->resource_get_handle(ndpy->screen, + res, (struct winsys_handle *) buf); +} + +static struct native_display_buffer wayland_display_buffer = { + wayland_display_import_buffer, + wayland_display_export_buffer +}; + +static struct native_display * +wayland_display_create(void *dpy, boolean use_sw, void *user_data) +{ + struct wayland_display *display; + + display = CALLOC_STRUCT(wayland_display); + if (!display) + return NULL; + + display->base.user_data = user_data; + + display->dpy = dpy; + if (!display->dpy->display) { + wayland_display_destroy(&display->base); + return NULL; + } + + if (!wayland_display_init_screen(&display->base)) { + wayland_display_destroy(&display->base); + return NULL; + } + + display->base.destroy = wayland_display_destroy; + display->base.get_param = wayland_display_get_param; + display->base.get_configs = wayland_display_get_configs; + display->base.is_pixmap_supported = wayland_display_is_pixmap_supported; + display->base.create_window_surface = wayland_create_window_surface; + display->base.create_pixmap_surface = wayland_create_pixmap_surface; + display->base.buffer = &wayland_display_buffer; + + return &display->base; +} + +static const struct native_platform wayland_platform = { + "wayland", /* name */ + wayland_set_event_handler, + wayland_display_create +}; + +const struct native_platform * +native_get_wayland_platform(void) +{ + return &wayland_platform; +} diff --git a/src/gallium/state_trackers/egl/wayland/native_wayland.h b/src/gallium/state_trackers/egl/wayland/native_wayland.h new file mode 100644 index 00000000000..271c10dc113 --- /dev/null +++ b/src/gallium/state_trackers/egl/wayland/native_wayland.h @@ -0,0 +1,97 @@ +/* + * Mesa 3-D graphics library + * Version: 7.11 + * + * Copyright (C) 2011 Benjamin Franzke <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NATIVE_WAYLAND_H_ +#define _NATIVE_WAYLAND_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + +#include "common/native.h" +#include "common/native_helper.h" + +#include "wayland-egl-priv.h" + +struct wayland_display { + struct native_display base; + + struct wayland_config *config; + struct wl_egl_display *dpy; +}; + +enum wayland_buffer_type { + WL_BUFFER_FRONT, + WL_BUFFER_BACK, + WL_BUFFER_COUNT +}; + +enum wayland_surface_type { + WL_WINDOW_SURFACE, + WL_PIXMAP_SURFACE, + WL_PBUFFER_SURFACE +}; + +struct wayland_surface { + struct native_surface base; + struct wayland_display *display; + + struct wl_egl_window *win; + struct wl_egl_pixmap *pix; + enum wayland_surface_type type; + int dx, dy; + struct resource_surface *rsurf; + struct pipe_resource *pending_resource; + enum pipe_format color_format; + + unsigned int sequence_number; + struct wl_buffer *buffer[WL_BUFFER_COUNT]; + unsigned int attachment_mask; + + boolean block_swap_buffers; +}; + +struct wayland_config { + struct native_config base; +}; + +static INLINE struct wayland_display * +wayland_display(const struct native_display *ndpy) +{ + return (struct wayland_display *) ndpy; +} + +static INLINE struct wayland_surface * +wayland_surface(const struct native_surface *nsurf) +{ + return (struct wayland_surface *) nsurf; +} + +static INLINE struct wayland_config * +wayland_config(const struct native_config *nconf) +{ + return (struct wayland_config *) nconf; +} + +#endif /* _NATIVE_WAYLAND_H_ */ diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index c82e2da863b..b18c3132c57 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -782,7 +782,7 @@ dri2_display_init_screen(struct native_display *ndpy) dri2dpy->event_handler->new_drm_screen(&dri2dpy->base, dri2dpy->dri_driver, fd); if (!dri2dpy->base.screen) { - _eglLog(_EGL_WARNING, "failed to create DRM screen"); + _eglLog(_EGL_DEBUG, "failed to create DRM screen"); return FALSE; } diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index d694651eef7..b30050bf6d4 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -268,7 +268,6 @@ struct st_context { void set_vertex_buffer(unsigned index, unsigned stride, - unsigned max_index, unsigned buffer_offset, struct pipe_resource *buffer) { @@ -277,7 +276,6 @@ struct st_context { memset(&state, 0, sizeof(state)); state.stride = stride; - state.max_index = max_index; state.buffer_offset = buffer_offset; state.buffer = buffer; @@ -352,6 +350,7 @@ struct st_context { vbuf = pipe_buffer_create(screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, size); if(!vbuf) goto error1; @@ -378,7 +377,6 @@ struct st_context { vbuffer.buffer = vbuf; vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - vbuffer.max_index = num_verts - 1; pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* draw */ diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index d55086fefd5..cc67da937cd 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -134,7 +134,7 @@ struct st_device { } struct pipe_resource * - buffer_create(unsigned size, unsigned bind = 0) { - return pipe_buffer_create($self->screen, bind, size); + buffer_create(unsigned size, unsigned usage, unsigned bind = 0) { + return pipe_buffer_create($self->screen, bind, usage, size); } }; diff --git a/src/gallium/state_trackers/vega/Makefile b/src/gallium/state_trackers/vega/Makefile index 7342c124c28..3e8ad49b767 100644 --- a/src/gallium/state_trackers/vega/Makefile +++ b/src/gallium/state_trackers/vega/Makefile @@ -24,23 +24,25 @@ C_SOURCES = \ api_path.c \ api_text.c \ api_transform.c \ - vgu.c \ - vg_context.c \ - vg_manager.c \ - vg_state.c \ - vg_translate.c \ - polygon.c \ - bezier.c \ - path.c \ - paint.c \ arc.c \ + bezier.c \ + handle.c \ image.c \ + mask.c \ + paint.c \ + path.c \ + polygon.c \ renderer.c \ + shader.c \ + shaders_cache.c \ stroker.c \ - mask.c \ text.c \ - shader.c \ - shaders_cache.c + vg_context.c \ + vg_manager.c \ + vg_state.c \ + vg_translate.c \ + vgu.c + GENERATED_SOURCES := api_tmp.h diff --git a/src/gallium/state_trackers/vega/SConscript b/src/gallium/state_trackers/vega/SConscript index 4900135a1c5..f3732aa1577 100644 --- a/src/gallium/state_trackers/vega/SConscript +++ b/src/gallium/state_trackers/vega/SConscript @@ -26,23 +26,24 @@ vega_sources = [ 'api_path.c', 'api_text.c', 'api_transform.c', - 'vgu.c', - 'vg_context.c', - 'vg_manager.c', - 'vg_state.c', - 'vg_translate.c', - 'polygon.c', - 'bezier.c', - 'path.c', - 'paint.c', 'arc.c', + 'bezier.c', + 'handle.c', 'image.c', - 'renderer.c', - 'stroker.c', 'mask.c', + 'paint.c', + 'path.c', + 'polygon.c', + 'renderer.c', 'shader.c', 'shaders_cache.c', + 'stroker.c', 'text.c', + 'vg_context.c', + 'vg_manager.c', + 'vg_state.c', + 'vg_translate.c', + 'vgu.c' ] api_tmp = env.CodeGenerate( diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 724e38241b5..6be460c495c 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -29,6 +29,7 @@ #include "vg_context.h" #include "image.h" #include "api.h" +#include "handle.h" #include "renderer.h" #include "shaders_cache.h" @@ -251,8 +252,8 @@ void vegaColorMatrix(VGImage dst, VGImage src, return; } - d = (struct vg_image*)dst; - s = (struct vg_image*)src; + d = handle_to_image(dst); + s = handle_to_image(src); if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -293,7 +294,7 @@ void vegaConvolve(VGImage dst, VGImage src, struct vg_image *d, *s; VGint kernel_size = kernelWidth * kernelHeight; struct filter_info info; - const VGint max_kernel_size = vgGeti(VG_MAX_KERNEL_SIZE); + const VGint max_kernel_size = vegaGeti(VG_MAX_KERNEL_SIZE); if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); @@ -317,8 +318,8 @@ void vegaConvolve(VGImage dst, VGImage src, return; } - d = (struct vg_image*)dst; - s = (struct vg_image*)src; + d = handle_to_image(dst); + s = handle_to_image(src); if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -395,7 +396,7 @@ void vegaSeparableConvolve(VGImage dst, VGImage src, struct vg_context *ctx = vg_current_context(); VGshort *kernel; VGint i, j, idx = 0; - const VGint max_kernel_size = vgGeti(VG_MAX_SEPARABLE_KERNEL_SIZE); + const VGint max_kernel_size = vegaGeti(VG_MAX_SEPARABLE_KERNEL_SIZE); if (dst == VG_INVALID_HANDLE || src == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); @@ -425,8 +426,8 @@ void vegaSeparableConvolve(VGImage dst, VGImage src, ++idx; } } - vgConvolve(dst, src, kernelWidth, kernelHeight, shiftX, shiftY, - kernel, scale, bias, tilingMode); + vegaConvolve(dst, src, kernelWidth, kernelHeight, shiftX, shiftY, + kernel, scale, bias, tilingMode); free(kernel); } @@ -502,8 +503,8 @@ void vegaGaussianBlur(VGImage dst, VGImage src, return; } - d = (struct vg_image*)dst; - s = (struct vg_image*)src; + d = handle_to_image(dst); + s = handle_to_image(src); if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -599,8 +600,8 @@ void vegaLookup(VGImage dst, VGImage src, return; } - d = (struct vg_image*)dst; - s = (struct vg_image*)src; + d = handle_to_image(dst); + s = handle_to_image(src); if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -662,8 +663,8 @@ void vegaLookupSingle(VGImage dst, VGImage src, return; } - d = (struct vg_image*)dst; - s = (struct vg_image*)src; + d = handle_to_image(dst); + s = handle_to_image(src); if (vg_image_overlaps(d, s)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c index ad95409cd00..b993ba62c26 100644 --- a/src/gallium/state_trackers/vega/api_images.c +++ b/src/gallium/state_trackers/vega/api_images.c @@ -32,6 +32,7 @@ #include "vg_translate.h" #include "api_consts.h" #include "api.h" +#include "handle.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" @@ -105,12 +106,12 @@ VGImage vegaCreateImage(VGImageFormat format, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; } - if (width > vgGeti(VG_MAX_IMAGE_WIDTH) || - height > vgGeti(VG_MAX_IMAGE_HEIGHT)) { + if (width > vegaGeti(VG_MAX_IMAGE_WIDTH) || + height > vegaGeti(VG_MAX_IMAGE_HEIGHT)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; } - if (width * height > vgGeti(VG_MAX_IMAGE_PIXELS)) { + if (width * height > vegaGeti(VG_MAX_IMAGE_PIXELS)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; } @@ -122,19 +123,19 @@ VGImage vegaCreateImage(VGImageFormat format, return VG_INVALID_HANDLE; } - return (VGImage)image_create(format, width, height); + return image_to_handle(image_create(format, width, height)); } void vegaDestroyImage(VGImage image) { struct vg_context *ctx = vg_current_context(); - struct vg_image *img = (struct vg_image *)image; + struct vg_image *img = handle_to_image(image); if (image == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (!vg_object_is_valid((void*)image, VG_OBJECT_IMAGE)) { + if (!vg_object_is_valid(image, VG_OBJECT_IMAGE)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -157,7 +158,7 @@ void vegaClearImage(VGImage image, return; } - img = (struct vg_image*)image; + img = handle_to_image(image); if (x + width < 0 || y + height < 0) return; @@ -189,7 +190,7 @@ void vegaImageSubData(VGImage image, return; } - img = (struct vg_image*)(image); + img = handle_to_image(image); image_sub_data(img, data, dataStride, dataFormat, x, y, width, height); } @@ -216,7 +217,7 @@ void vegaGetImageSubData(VGImage image, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - img = (struct vg_image*)image; + img = handle_to_image(image); image_get_sub_data(img, data, dataStride, dataFormat, x, y, width, height); } @@ -229,8 +230,8 @@ VGImage vegaChildImage(VGImage parent, struct vg_image *p; if (parent == VG_INVALID_HANDLE || - !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void*)parent) || - !vg_object_is_valid((void*)parent, VG_OBJECT_IMAGE)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, parent) || + !vg_object_is_valid(parent, VG_OBJECT_IMAGE)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return VG_INVALID_HANDLE; } @@ -238,7 +239,7 @@ VGImage vegaChildImage(VGImage parent, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; } - p = (struct vg_image *)parent; + p = handle_to_image(parent); if (x > p->width || y > p->height) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; @@ -248,7 +249,7 @@ VGImage vegaChildImage(VGImage parent, return VG_INVALID_HANDLE; } - return (VGImage)image_child_image(p, x, y, width, height); + return image_to_handle(image_child_image(p, x, y, width, height)); } VGImage vegaGetParent(VGImage image) @@ -261,9 +262,9 @@ VGImage vegaGetParent(VGImage image) return VG_INVALID_HANDLE; } - img = (struct vg_image*)image; + img = handle_to_image(image); if (img->parent) - return (VGImage)img->parent; + return image_to_handle(img->parent); else return image; } @@ -285,8 +286,8 @@ void vegaCopyImage(VGImage dst, VGint dx, VGint dy, return; } vg_validate_state(ctx); - image_copy((struct vg_image*)dst, dx, dy, - (struct vg_image*)src, sx, sy, + image_copy(handle_to_image(dst), dx, dy, + handle_to_image(src), sx, sy, width, height, dither); } @@ -303,7 +304,7 @@ void vegaDrawImage(VGImage image) } vg_validate_state(ctx); - image_draw((struct vg_image*)image, + image_draw(handle_to_image(image), &ctx->state.vg.image_user_to_surface_matrix); } @@ -323,7 +324,7 @@ void vegaSetPixels(VGint dx, VGint dy, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - image_set_pixels(dx, dy, (struct vg_image*)src, sx, sy, width, + image_set_pixels(dx, dy, handle_to_image(src), sx, sy, width, height); } @@ -343,7 +344,7 @@ void vegaGetPixels(VGImage dst, VGint dx, VGint dy, return; } - img = (struct vg_image*)dst; + img = handle_to_image(dst); image_get_pixels(img, dx, dy, sx, sy, width, height); @@ -487,7 +488,7 @@ void vegaCopyPixels(VGint dx, VGint dy, vg_validate_state(ctx); /* make sure rendering has completed */ - vgFinish(); + vegaFinish(); vg_copy_surface(ctx, strb->surface, dx, dy, strb->surface, sx, sy, width, height); diff --git a/src/gallium/state_trackers/vega/api_masks.c b/src/gallium/state_trackers/vega/api_masks.c index d96afe66387..cdbf0026e89 100644 --- a/src/gallium/state_trackers/vega/api_masks.c +++ b/src/gallium/state_trackers/vega/api_masks.c @@ -28,6 +28,7 @@ #include "mask.h" #include "api.h" +#include "handle.h" #include "renderer.h" #include "vg_context.h" @@ -56,11 +57,11 @@ void vegaMask(VGHandle mask, VGMaskOperation operation, mask_fill(x, y, width, height, 0.f); } else if (operation == VG_FILL_MASK) { mask_fill(x, y, width, height, 1.f); - } else if (vg_object_is_valid((void*)mask, VG_OBJECT_IMAGE)) { - struct vg_image *image = (struct vg_image *)mask; + } else if (vg_object_is_valid(mask, VG_OBJECT_IMAGE)) { + struct vg_image *image = handle_to_image(mask); mask_using_image(image, operation, x, y, width, height); - } else if (vg_object_is_valid((void*)mask, VG_OBJECT_MASK)) { - struct vg_mask_layer *layer = (struct vg_mask_layer *)mask; + } else if (vg_object_is_valid(mask, VG_OBJECT_MASK)) { + struct vg_mask_layer *layer = handle_to_masklayer(mask); mask_using_layer(layer, operation, x, y, width, height); } else { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); @@ -123,14 +124,14 @@ void vegaRenderToMask(VGPath path, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - if (!vg_object_is_valid((void*)path, VG_OBJECT_PATH)) { + if (!vg_object_is_valid(path, VG_OBJECT_PATH)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } vg_validate_state(ctx); - mask_render_to((struct path *)path, paintModes, operation); + mask_render_to(handle_to_path(path), paintModes, operation); } VGMaskLayer vegaCreateMaskLayer(VGint width, VGint height) @@ -138,13 +139,13 @@ VGMaskLayer vegaCreateMaskLayer(VGint width, VGint height) struct vg_context *ctx = vg_current_context(); if (width <= 0 || height <= 0 || - width > vgGeti(VG_MAX_IMAGE_WIDTH) || - height > vgGeti(VG_MAX_IMAGE_HEIGHT)) { + width > vegaGeti(VG_MAX_IMAGE_WIDTH) || + height > vegaGeti(VG_MAX_IMAGE_HEIGHT)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return VG_INVALID_HANDLE; } - return (VGMaskLayer)mask_layer_create(width, height); + return masklayer_to_handle(mask_layer_create(width, height)); } void vegaDestroyMaskLayer(VGMaskLayer maskLayer) @@ -156,12 +157,12 @@ void vegaDestroyMaskLayer(VGMaskLayer maskLayer) vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) { + if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - mask = (struct vg_mask_layer *)maskLayer; + mask = handle_to_masklayer(maskLayer); mask_layer_destroy(mask); } @@ -192,12 +193,12 @@ void vegaFillMaskLayer(VGMaskLayer maskLayer, return; } - if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) { + if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - mask = (struct vg_mask_layer*)maskLayer; + mask = handle_to_masklayer(maskLayer); if (x + width > mask_layer_width(mask) || y + height > mask_layer_height(mask)) { @@ -226,14 +227,14 @@ void vegaCopyMask(VGMaskLayer maskLayer, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - if (!vg_object_is_valid((void*)maskLayer, VG_OBJECT_MASK)) { + if (!vg_object_is_valid(maskLayer, VG_OBJECT_MASK)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } vg_validate_state(ctx); - mask = (struct vg_mask_layer*)maskLayer; + mask = handle_to_masklayer(maskLayer); mask_copy(mask, sx, sy, dx, dy, width, height); } diff --git a/src/gallium/state_trackers/vega/api_paint.c b/src/gallium/state_trackers/vega/api_paint.c index 1411806455c..2610ebe0576 100644 --- a/src/gallium/state_trackers/vega/api_paint.c +++ b/src/gallium/state_trackers/vega/api_paint.c @@ -29,24 +29,24 @@ #include "vg_context.h" #include "paint.h" #include "api.h" +#include "handle.h" + VGPaint vegaCreatePaint(void) { - return (VGPaint) paint_create(vg_current_context()); + return paint_to_handle(paint_create(vg_current_context())); } void vegaDestroyPaint(VGPaint p) { struct vg_context *ctx = vg_current_context(); - struct vg_paint *paint; if (p == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - paint = (struct vg_paint *)p; - paint_destroy(paint); + paint_destroy(handle_to_paint(p)); } void vegaSetPaint(VGPaint paint, VGbitfield paintModes) @@ -55,8 +55,8 @@ void vegaSetPaint(VGPaint paint, VGbitfield paintModes) if (paint == VG_INVALID_HANDLE) { /* restore the default */ - paint = (VGPaint)ctx->default_paint; - } else if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) { + paint = paint_to_handle(ctx->default_paint); + } else if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -67,11 +67,13 @@ void vegaSetPaint(VGPaint paint, VGbitfield paintModes) } if (paintModes & VG_FILL_PATH) { - ctx->state.vg.fill_paint = (struct vg_paint *)paint; + ctx->state.vg.fill_paint = handle_to_paint(paint); } if (paintModes & VG_STROKE_PATH) { - ctx->state.vg.stroke_paint = (struct vg_paint *)paint; + ctx->state.vg.stroke_paint = handle_to_paint(paint); } + + ctx->state.dirty |= PAINT_DIRTY; } VGPaint vegaGetPaint(VGPaintMode paintMode) @@ -85,11 +87,11 @@ VGPaint vegaGetPaint(VGPaintMode paintMode) } if (paintMode == VG_FILL_PATH) - paint = (VGPaint)ctx->state.vg.fill_paint; + paint = paint_to_handle(ctx->state.vg.fill_paint); else if (paintMode == VG_STROKE_PATH) - paint = (VGPaint)ctx->state.vg.stroke_paint; + paint = paint_to_handle(ctx->state.vg.stroke_paint); - if (paint == (VGPaint)ctx->default_paint) + if (paint == paint_to_handle(ctx->default_paint)) paint = VG_INVALID_HANDLE; return paint; @@ -98,20 +100,24 @@ VGPaint vegaGetPaint(VGPaintMode paintMode) void vegaSetColor(VGPaint paint, VGuint rgba) { struct vg_context *ctx = vg_current_context(); + struct vg_paint *p; if (paint == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) { + if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - { - struct vg_paint *p = (struct vg_paint *)paint; - paint_set_colori(p, rgba); - } + + p = handle_to_paint(paint); + paint_set_colori(p, rgba); + + if (ctx->state.vg.fill_paint == p || + ctx->state.vg.stroke_paint == p) + ctx->state.dirty |= PAINT_DIRTY; } VGuint vegaGetColor(VGPaint paint) @@ -125,11 +131,11 @@ VGuint vegaGetColor(VGPaint paint) return rgba; } - if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT)) { + if (!vg_object_is_valid(paint, VG_OBJECT_PAINT)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return rgba; } - p = (struct vg_paint *)paint; + p = handle_to_paint(paint); return paint_colori(p); } @@ -139,28 +145,28 @@ void vegaPaintPattern(VGPaint paint, VGImage pattern) struct vg_context *ctx = vg_current_context(); if (paint == VG_INVALID_HANDLE || - !vg_context_is_object_valid(ctx, VG_OBJECT_PAINT, (void *)paint)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_PAINT, paint)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } if (pattern == VG_INVALID_HANDLE) { - paint_set_type((struct vg_paint*)paint, VG_PAINT_TYPE_COLOR); + paint_set_type(handle_to_paint(paint), VG_PAINT_TYPE_COLOR); return; } - if (!vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void *)pattern)) { + if (!vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, pattern)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (!vg_object_is_valid((void*)paint, VG_OBJECT_PAINT) || - !vg_object_is_valid((void*)pattern, VG_OBJECT_IMAGE)) { + if (!vg_object_is_valid(paint, VG_OBJECT_PAINT) || + !vg_object_is_valid(pattern, VG_OBJECT_IMAGE)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - paint_set_pattern((struct vg_paint*)paint, - (struct vg_image*)pattern); + paint_set_pattern(handle_to_paint(paint), + handle_to_image(pattern)); } diff --git a/src/gallium/state_trackers/vega/api_params.c b/src/gallium/state_trackers/vega/api_params.c index a73b6c3effe..aa1e5dd280a 100644 --- a/src/gallium/state_trackers/vega/api_params.c +++ b/src/gallium/state_trackers/vega/api_params.c @@ -29,6 +29,7 @@ #include "vg_context.h" #include "paint.h" #include "path.h" +#include "handle.h" #include "image.h" #include "text.h" #include "matrix.h" @@ -60,7 +61,7 @@ static INLINE VGboolean count_in_bounds(VGParamType type, VGint count) else if (type == VG_STROKE_DASH_PATTERN) { return count <= VEGA_MAX_DASH_COUNT; } else { - VGint real_count = vgGetVectorSize(type); + VGint real_count = vegaGetVectorSize(type); return count == real_count; } } @@ -103,7 +104,7 @@ void vegaSetf (VGParamType type, VGfloat value) case VG_MAX_IMAGE_BYTES: case VG_MAX_GAUSSIAN_STD_DEVIATION: case VG_MAX_FLOAT: - vgSeti(type, floor(value)); + vegaSeti(type, floor(value)); return; break; case VG_STROKE_LINE_WIDTH: @@ -289,7 +290,7 @@ void vegaSetfv(VGParamType type, VGint count, case VG_FILTER_FORMAT_LINEAR: case VG_FILTER_FORMAT_PREMULTIPLIED: case VG_FILTER_CHANNEL_MASK: - vgSeti(type, floor(values[0])); + vegaSeti(type, floor(values[0])); return; break; case VG_SCISSOR_RECTS: { @@ -416,7 +417,7 @@ void vegaSetiv(VGParamType type, VGint count, case VG_FILTER_FORMAT_LINEAR: case VG_FILTER_FORMAT_PREMULTIPLIED: case VG_FILTER_CHANNEL_MASK: - vgSeti(type, values[0]); + vegaSeti(type, values[0]); return; break; case VG_SCISSOR_RECTS: { @@ -536,7 +537,7 @@ VGfloat vegaGetf(VGParamType type) case VG_FILTER_FORMAT_LINEAR: case VG_FILTER_FORMAT_PREMULTIPLIED: case VG_FILTER_CHANNEL_MASK: - return vgGeti(type); + return vegaGeti(type); break; case VG_STROKE_LINE_WIDTH: value = state->stroke.line_width.f; @@ -558,7 +559,7 @@ VGfloat vegaGetf(VGParamType type) case VG_MAX_IMAGE_PIXELS: case VG_MAX_IMAGE_BYTES: case VG_MAX_GAUSSIAN_STD_DEVIATION: - return vgGeti(type); + return vegaGeti(type); break; case VG_MAX_FLOAT: value = 1e+10;/*must be at least 1e+10*/ @@ -674,7 +675,7 @@ VGint vegaGeti(VGParamType type) break; case VG_MAX_FLOAT: { - VGfloat val = vgGetf(type); + VGfloat val = vegaGetf(type); value = float_to_int_floor(*((VGuint*)&val)); } break; @@ -765,7 +766,7 @@ void vegaGetfv(VGParamType type, VGint count, { const struct vg_state *state = current_state(); struct vg_context *ctx = vg_current_context(); - VGint real_count = vgGetVectorSize(type); + VGint real_count = vegaGetVectorSize(type); if (!values || count <= 0 || count > real_count || !is_aligned(values)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -802,10 +803,10 @@ void vegaGetfv(VGParamType type, VGint count, case VG_MAX_IMAGE_PIXELS: case VG_MAX_IMAGE_BYTES: case VG_MAX_GAUSSIAN_STD_DEVIATION: - values[0] = vgGeti(type); + values[0] = vegaGeti(type); break; case VG_MAX_FLOAT: - values[0] = vgGetf(type); + values[0] = vegaGetf(type); break; case VG_SCISSOR_RECTS: { VGint i; @@ -866,7 +867,7 @@ void vegaGetiv(VGParamType type, VGint count, { const struct vg_state *state = current_state(); struct vg_context *ctx = vg_current_context(); - VGint real_count = vgGetVectorSize(type); + VGint real_count = vegaGetVectorSize(type); if (!values || count <= 0 || count > real_count || !is_aligned(values)) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -903,10 +904,10 @@ void vegaGetiv(VGParamType type, VGint count, case VG_MAX_IMAGE_PIXELS: case VG_MAX_IMAGE_BYTES: case VG_MAX_GAUSSIAN_STD_DEVIATION: - values[0] = vgGeti(type); + values[0] = vegaGeti(type); break; case VG_MAX_FLOAT: { - VGfloat val = vgGetf(type); + VGfloat val = vegaGetf(type); values[0] = float_to_int_floor(*((VGuint*)&val)); } break; @@ -972,9 +973,9 @@ void vegaSetParameterf(VGHandle object, VGfloat value) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; + void *ptr = handle_to_pointer(object); - if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) { + if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -983,7 +984,7 @@ void vegaSetParameterf(VGHandle object, case VG_PAINT_TYPE: case VG_PAINT_COLOR_RAMP_SPREAD_MODE: case VG_PAINT_PATTERN_TILING_MODE: - vgSetParameteri(object, paramType, floor(value)); + vegaSetParameteri(object, paramType, floor(value)); return; break; case VG_PAINT_COLOR: @@ -994,7 +995,7 @@ void vegaSetParameterf(VGHandle object, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); break; case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); paint_set_color_ramp_premultiplied(p, value); } break; @@ -1026,9 +1027,9 @@ void vegaSetParameteri(VGHandle object, VGint value) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; + void *ptr = handle_to_pointer(object); - if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) { + if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -1039,7 +1040,7 @@ void vegaSetParameteri(VGHandle object, value > VG_PAINT_TYPE_PATTERN) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)ptr; + struct vg_paint *paint = handle_to_paint(object); paint_set_type(paint, value); } break; @@ -1055,12 +1056,12 @@ void vegaSetParameteri(VGHandle object, value > VG_COLOR_RAMP_SPREAD_REFLECT) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)ptr; + struct vg_paint *paint = handle_to_paint(object); paint_set_spread_mode(paint, value); } break; case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); paint_set_color_ramp_premultiplied(p, value); } break; @@ -1069,7 +1070,7 @@ void vegaSetParameteri(VGHandle object, value > VG_TILE_REFLECT) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)ptr; + struct vg_paint *paint = handle_to_paint(object); paint_set_pattern_tiling(paint, value); } break; @@ -1102,10 +1103,10 @@ void vegaSetParameterfv(VGHandle object, const VGfloat * values) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - VGint real_count = vgGetParameterVectorSize(object, paramType); + void *ptr = handle_to_pointer(object); + VGint real_count = vegaGetParameterVectorSize(object, paramType); - if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) { + if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -1125,15 +1126,18 @@ void vegaSetParameterfv(VGHandle object, if (count != 1) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else - vgSetParameterf(object, paramType, values[0]); + vegaSetParameterf(object, paramType, values[0]); return; break; case VG_PAINT_COLOR: { if (count != 4) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_set_color(paint, values); + if (ctx->state.vg.fill_paint == paint || + ctx->state.vg.stroke_paint == paint) + ctx->state.dirty |= PAINT_DIRTY; } } break; @@ -1141,7 +1145,7 @@ void vegaSetParameterfv(VGHandle object, if (count && count < 4) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); count = MIN2(count, VEGA_MAX_COLOR_RAMP_STOPS); paint_set_ramp_stops(paint, values, count); { @@ -1159,7 +1163,7 @@ void vegaSetParameterfv(VGHandle object, if (count != 4) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_set_linear_gradient(paint, values); { VGint vals[4]; @@ -1176,7 +1180,7 @@ void vegaSetParameterfv(VGHandle object, if (count != 5) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_set_radial_gradient(paint, values); { VGint vals[5]; @@ -1215,10 +1219,10 @@ void vegaSetParameteriv(VGHandle object, const VGint * values) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - VGint real_count = vgGetParameterVectorSize(object, paramType); + void *ptr = handle_to_pointer(object); + VGint real_count = vegaGetParameterVectorSize(object, paramType); - if (!object || object == VG_INVALID_HANDLE || !is_aligned(ptr)) { + if (object == VG_INVALID_HANDLE || !is_aligned(ptr)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -1238,15 +1242,18 @@ void vegaSetParameteriv(VGHandle object, if (count != 1) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else - vgSetParameteri(object, paramType, values[0]); + vegaSetParameteri(object, paramType, values[0]); return; break; case VG_PAINT_COLOR: { if (count != 4) vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_set_coloriv(paint, values); + if (ctx->state.vg.fill_paint == paint || + ctx->state.vg.stroke_paint == paint) + ctx->state.dirty |= PAINT_DIRTY; } } break; @@ -1256,7 +1263,7 @@ void vegaSetParameteriv(VGHandle object, else { VGfloat *vals = 0; int i; - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); if (count) { vals = malloc(sizeof(VGfloat)*count); for (i = 0; i < count; ++i) @@ -1274,7 +1281,7 @@ void vegaSetParameteriv(VGHandle object, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { VGfloat vals[4]; - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); vals[0] = values[0]; vals[1] = values[1]; vals[2] = values[2]; @@ -1289,7 +1296,7 @@ void vegaSetParameteriv(VGHandle object, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); else { VGfloat vals[5]; - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); vals[0] = values[0]; vals[1] = values[1]; vals[2] = values[2]; @@ -1318,9 +1325,8 @@ VGint vegaGetParameterVectorSize(VGHandle object, VGint paramType) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - if (!ptr || object == VG_INVALID_HANDLE) { + if (object == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return 0; } @@ -1334,7 +1340,7 @@ VGint vegaGetParameterVectorSize(VGHandle object, case VG_PAINT_COLOR: return 4; case VG_PAINT_COLOR_RAMP_STOPS: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); return paint_num_ramp_stops(p); } break; @@ -1374,9 +1380,8 @@ VGfloat vegaGetParameterf(VGHandle object, VGint paramType) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - if (!ptr || object == VG_INVALID_HANDLE) { + if (object == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return 0; } @@ -1386,7 +1391,7 @@ VGfloat vegaGetParameterf(VGHandle object, case VG_PAINT_COLOR_RAMP_SPREAD_MODE: case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: case VG_PAINT_PATTERN_TILING_MODE: - return vgGetParameteri(object, paramType); + return vegaGetParameteri(object, paramType); break; case VG_PAINT_COLOR: case VG_PAINT_COLOR_RAMP_STOPS: @@ -1398,17 +1403,17 @@ VGfloat vegaGetParameterf(VGHandle object, case VG_PATH_FORMAT: return VG_PATH_FORMAT_STANDARD; case VG_PATH_SCALE: { - struct path *p = (struct path*)object; + struct path *p = handle_to_path(object); return path_scale(p); } case VG_PATH_BIAS: { - struct path *p = (struct path*)object; + struct path *p = handle_to_path(object); return path_bias(p); } case VG_PATH_DATATYPE: case VG_PATH_NUM_SEGMENTS: case VG_PATH_NUM_COORDS: - return vgGetParameteri(object, paramType); + return vegaGetParameteri(object, paramType); break; case VG_IMAGE_FORMAT: @@ -1416,7 +1421,7 @@ VGfloat vegaGetParameterf(VGHandle object, case VG_IMAGE_HEIGHT: #ifdef OPENVG_VERSION_1_1 case VG_FONT_NUM_GLYPHS: - return vgGetParameteri(object, paramType); + return vegaGetParameteri(object, paramType); break; #endif @@ -1431,30 +1436,29 @@ VGint vegaGetParameteri(VGHandle object, VGint paramType) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - if (!ptr || object == VG_INVALID_HANDLE) { + if (object == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return 0; } switch(paramType) { case VG_PAINT_TYPE: { - struct vg_paint *paint = (struct vg_paint *)ptr; + struct vg_paint *paint = handle_to_paint(object); return paint_type(paint); } break; case VG_PAINT_COLOR_RAMP_SPREAD_MODE: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); return paint_spread_mode(p); } case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); return paint_color_ramp_premultiplied(p); } break; case VG_PAINT_PATTERN_TILING_MODE: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); return paint_pattern_tiling(p); } break; @@ -1469,40 +1473,40 @@ VGint vegaGetParameteri(VGHandle object, return VG_PATH_FORMAT_STANDARD; case VG_PATH_SCALE: case VG_PATH_BIAS: - return vgGetParameterf(object, paramType); + return vegaGetParameterf(object, paramType); case VG_PATH_DATATYPE: { - struct path *p = (struct path*)object; + struct path *p = handle_to_path(object); return path_datatype(p); } case VG_PATH_NUM_SEGMENTS: { - struct path *p = (struct path*)object; + struct path *p = handle_to_path(object); return path_num_segments(p); } case VG_PATH_NUM_COORDS: { - struct path *p = (struct path*)object; + struct path *p = handle_to_path(object); return path_num_coords(p); } break; case VG_IMAGE_FORMAT: { - struct vg_image *img = (struct vg_image*)object; + struct vg_image *img = handle_to_image(object); return img->format; } break; case VG_IMAGE_WIDTH: { - struct vg_image *img = (struct vg_image*)object; + struct vg_image *img = handle_to_image(object); return img->width; } break; case VG_IMAGE_HEIGHT: { - struct vg_image *img = (struct vg_image*)object; + struct vg_image *img = handle_to_image(object); return img->height; } break; #ifdef OPENVG_VERSION_1_1 case VG_FONT_NUM_GLYPHS: { - struct vg_font *font = (struct vg_font*)object; + struct vg_font *font = handle_to_font(object); return font_num_glyphs(font); } break; @@ -1521,10 +1525,9 @@ void vegaGetParameterfv(VGHandle object, VGfloat * values) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - VGint real_count = vgGetParameterVectorSize(object, paramType); + VGint real_count = vegaGetParameterVectorSize(object, paramType); - if (!ptr || object == VG_INVALID_HANDLE) { + if (object == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -1537,41 +1540,41 @@ void vegaGetParameterfv(VGHandle object, switch(paramType) { case VG_PAINT_TYPE: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); values[0] = paint_type(p); } break; case VG_PAINT_COLOR_RAMP_SPREAD_MODE: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); values[0] = paint_spread_mode(p); } break; case VG_PAINT_COLOR_RAMP_PREMULTIPLIED: { - struct vg_paint *p = (struct vg_paint *)object; + struct vg_paint *p = handle_to_paint(object); values[0] = paint_color_ramp_premultiplied(p); } break; case VG_PAINT_PATTERN_TILING_MODE: { - values[0] = vgGetParameterf(object, paramType); + values[0] = vegaGetParameterf(object, paramType); } break; case VG_PAINT_COLOR: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_get_color(paint, values); } break; case VG_PAINT_COLOR_RAMP_STOPS: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_ramp_stops(paint, values, count); } break; case VG_PAINT_LINEAR_GRADIENT: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_linear_gradient(paint, values); } break; case VG_PAINT_RADIAL_GRADIENT: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_radial_gradient(paint, values); } break; @@ -1580,11 +1583,11 @@ void vegaGetParameterfv(VGHandle object, case VG_PATH_DATATYPE: case VG_PATH_NUM_SEGMENTS: case VG_PATH_NUM_COORDS: - values[0] = vgGetParameteri(object, paramType); + values[0] = vegaGetParameteri(object, paramType); break; case VG_PATH_SCALE: case VG_PATH_BIAS: - values[0] = vgGetParameterf(object, paramType); + values[0] = vegaGetParameterf(object, paramType); break; case VG_IMAGE_FORMAT: @@ -1592,7 +1595,7 @@ void vegaGetParameterfv(VGHandle object, case VG_IMAGE_HEIGHT: #ifdef OPENVG_VERSION_1_1 case VG_FONT_NUM_GLYPHS: - values[0] = vgGetParameteri(object, paramType); + values[0] = vegaGetParameteri(object, paramType); break; #endif @@ -1608,10 +1611,9 @@ void vegaGetParameteriv(VGHandle object, VGint * values) { struct vg_context *ctx = vg_current_context(); - void *ptr = (void*)object; - VGint real_count = vgGetParameterVectorSize(object, paramType); + VGint real_count = vegaGetParameterVectorSize(object, paramType); - if (!ptr || object == VG_INVALID_HANDLE) { + if (object || object == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -1629,45 +1631,45 @@ void vegaGetParameteriv(VGHandle object, case VG_PAINT_PATTERN_TILING_MODE: #ifdef OPENVG_VERSION_1_1 case VG_FONT_NUM_GLYPHS: - values[0] = vgGetParameteri(object, paramType); + values[0] = vegaGetParameteri(object, paramType); break; #endif case VG_PAINT_COLOR: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_get_coloriv(paint, values); } break; case VG_PAINT_COLOR_RAMP_STOPS: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_ramp_stopsi(paint, values, count); } break; case VG_PAINT_LINEAR_GRADIENT: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_linear_gradienti(paint, values); } break; case VG_PAINT_RADIAL_GRADIENT: { - struct vg_paint *paint = (struct vg_paint *)object; + struct vg_paint *paint = handle_to_paint(object); paint_radial_gradienti(paint, values); } break; case VG_PATH_SCALE: case VG_PATH_BIAS: - values[0] = vgGetParameterf(object, paramType); + values[0] = vegaGetParameterf(object, paramType); break; case VG_PATH_FORMAT: case VG_PATH_DATATYPE: case VG_PATH_NUM_SEGMENTS: case VG_PATH_NUM_COORDS: - values[0] = vgGetParameteri(object, paramType); + values[0] = vegaGetParameteri(object, paramType); break; case VG_IMAGE_FORMAT: case VG_IMAGE_WIDTH: case VG_IMAGE_HEIGHT: - values[0] = vgGetParameteri(object, paramType); + values[0] = vegaGetParameteri(object, paramType); break; default: diff --git a/src/gallium/state_trackers/vega/api_path.c b/src/gallium/state_trackers/vega/api_path.c index fe57b7671d9..ab6ce958945 100644 --- a/src/gallium/state_trackers/vega/api_path.c +++ b/src/gallium/state_trackers/vega/api_path.c @@ -27,6 +27,7 @@ #include "VG/openvg.h" #include "vg_context.h" +#include "handle.h" #include "path.h" #include "api.h" @@ -55,9 +56,9 @@ VGPath vegaCreatePath(VGint pathFormat, return VG_INVALID_HANDLE; } - return (VGPath)path_create(datatype, scale, bias, - segmentCapacityHint, coordCapacityHint, - capabilities); + return path_to_handle(path_create(datatype, scale, bias, + segmentCapacityHint, coordCapacityHint, + capabilities)); } void vegaClearPath(VGPath path, VGbitfield capabilities) @@ -70,7 +71,7 @@ void vegaClearPath(VGPath path, VGbitfield capabilities) return; } - p = (struct path *)path; + p = handle_to_path(path); path_clear(p, capabilities); } @@ -84,7 +85,7 @@ void vegaDestroyPath(VGPath p) return; } - path = (struct path *)p; + path = handle_to_path(p); path_destroy(path); } @@ -100,7 +101,7 @@ void vegaRemovePathCapabilities(VGPath path, return; } - p = (struct path*)path; + p = handle_to_path(path); current = path_capabilities(p); path_set_capabilities(p, (current & (~(capabilities & VG_PATH_CAPABILITY_ALL)))); @@ -115,7 +116,7 @@ VGbitfield vegaGetPathCapabilities(VGPath path) vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return 0; } - p = (struct path*)path; + p = handle_to_path(path); return path_capabilities(p); } @@ -128,8 +129,8 @@ void vegaAppendPath(VGPath dstPath, VGPath srcPath) vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - src = (struct path *)srcPath; - dst = (struct path *)dstPath; + src = handle_to_path(srcPath); + dst = handle_to_path(dstPath); if (!(path_capabilities(src) & VG_PATH_CAPABILITY_APPEND_FROM) || !(path_capabilities(dst) & VG_PATH_CAPABILITY_APPEND_TO)) { @@ -167,9 +168,9 @@ void vegaAppendPathData(VGPath dstPath, } } - p = (struct path*)dstPath; + p = handle_to_path(dstPath); - if (!pathData || !is_aligned_to(pathData, path_datatype_size(p))) { + if (!p || !is_aligned_to(p, path_datatype_size(p))) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } @@ -199,7 +200,7 @@ void vegaModifyPathCoords(VGPath dstPath, return; } - p = (struct path *)dstPath; + p = handle_to_path(dstPath); if (!pathData || !is_aligned_to(pathData, path_datatype_size(p))) { vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); @@ -226,8 +227,8 @@ void vegaTransformPath(VGPath dstPath, VGPath srcPath) vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - src = (struct path *)srcPath; - dst = (struct path *)dstPath; + src = handle_to_path(srcPath); + dst = handle_to_path(dstPath); if (!(path_capabilities(src) & VG_PATH_CAPABILITY_TRANSFORM_FROM) || !(path_capabilities(dst) & VG_PATH_CAPABILITY_TRANSFORM_TO)) { @@ -251,9 +252,9 @@ VGboolean vegaInterpolatePath(VGPath dstPath, vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return VG_FALSE; } - dst = (struct path *)dstPath; - start = (struct path *)startPath; - end = (struct path *)endPath; + dst = handle_to_path(dstPath); + start = handle_to_path(startPath); + end = handle_to_path(endPath); if (!(path_capabilities(dst) & VG_PATH_CAPABILITY_INTERPOLATE_TO) || !(path_capabilities(start) & VG_PATH_CAPABILITY_INTERPOLATE_FROM) || @@ -285,7 +286,7 @@ VGfloat vegaPathLength(VGPath path, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return -1; } - p = (struct path*)path; + p = handle_to_path(path); if (!(path_capabilities(p) & VG_PATH_CAPABILITY_PATH_LENGTH)) { vg_set_error(ctx, VG_PATH_CAPABILITY_ERROR); @@ -330,7 +331,7 @@ void vegaPointAlongPath(VGPath path, return; } - p = (struct path*)path; + p = handle_to_path(path); caps = path_capabilities(p); if (!(caps & VG_PATH_CAPABILITY_POINT_ALONG_PATH) || @@ -385,7 +386,7 @@ void vegaPathBounds(VGPath path, return; } - p = (struct path*)path; + p = handle_to_path(path); caps = path_capabilities(p); if (!(caps & VG_PATH_CAPABILITY_PATH_BOUNDS)) { @@ -422,7 +423,7 @@ void vegaPathTransformedBounds(VGPath path, return; } - p = (struct path*)path; + p = handle_to_path(path); caps = path_capabilities(p); if (!(caps & VG_PATH_CAPABILITY_PATH_TRANSFORMED_BOUNDS)) { @@ -466,6 +467,7 @@ void vegaPathTransformedBounds(VGPath path, void vegaDrawPath(VGPath path, VGbitfield paintModes) { struct vg_context *ctx = vg_current_context(); + struct path *p = handle_to_path(path); if (path == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); @@ -477,9 +479,9 @@ void vegaDrawPath(VGPath path, VGbitfield paintModes) return; } - if (path_is_empty((struct path*)path)) + if (path_is_empty(p)) return; - path_render((struct path*)path, paintModes, + path_render(p, paintModes, &ctx->state.vg.path_user_to_surface_matrix); } diff --git a/src/gallium/state_trackers/vega/api_text.c b/src/gallium/state_trackers/vega/api_text.c index 7c6b4794099..824c7630403 100644 --- a/src/gallium/state_trackers/vega/api_text.c +++ b/src/gallium/state_trackers/vega/api_text.c @@ -29,6 +29,7 @@ #include "vg_context.h" #include "text.h" #include "api.h" +#include "handle.h" #include "util/u_memory.h" @@ -43,19 +44,19 @@ VGFont vegaCreateFont(VGint glyphCapacityHint) return VG_INVALID_HANDLE; } - return (VGFont) font_create(glyphCapacityHint); + return font_to_handle(font_create(glyphCapacityHint)); } void vegaDestroyFont(VGFont f) { - struct vg_font *font = (struct vg_font *)f; + struct vg_font *font = handle_to_font(f); struct vg_context *ctx = vg_current_context(); if (f == VG_INVALID_HANDLE) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - if (!vg_object_is_valid((void *) font, VG_OBJECT_FONT)) { + if (!vg_object_is_valid(f, VG_OBJECT_FONT)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -75,7 +76,7 @@ void vegaSetGlyphToPath(VGFont font, struct vg_font *f; if (font == VG_INVALID_HANDLE || - !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, (void *)font)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, font)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -85,13 +86,13 @@ void vegaSetGlyphToPath(VGFont font, return; } if (path != VG_INVALID_HANDLE && - !vg_context_is_object_valid(ctx, VG_OBJECT_PATH, (void *)path)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_PATH, path)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - pathObj = (struct path*) path; - f = (struct vg_font*) font; + pathObj = handle_to_path(path); + f = handle_to_font(font); font_set_glyph_to_path(f, glyphIndex, pathObj, isHinted, glyphOrigin, escapement); @@ -108,7 +109,7 @@ void vegaSetGlyphToImage(VGFont font, struct vg_font *f; if (font == VG_INVALID_HANDLE || - !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, (void *)font)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_FONT, font)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } @@ -118,13 +119,13 @@ void vegaSetGlyphToImage(VGFont font, return; } if (image != VG_INVALID_HANDLE && - !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, (void *)image)) { + !vg_context_is_object_valid(ctx, VG_OBJECT_IMAGE, image)) { vg_set_error(ctx, VG_BAD_HANDLE_ERROR); return; } - img_obj = (struct vg_image*)image; - f = (struct vg_font*)font; + img_obj = handle_to_image(image); + f = handle_to_font(font); font_set_glyph_to_image(f, glyphIndex, img_obj, glyphOrigin, escapement); } @@ -140,7 +141,7 @@ void vegaClearGlyph(VGFont font, return; } - f = (struct vg_font*) font; + f = handle_to_font(font); font_clear_glyph(f, glyphIndex); } @@ -161,7 +162,7 @@ void vegaDrawGlyph(VGFont font, vg_set_error(ctx, VG_ILLEGAL_ARGUMENT_ERROR); return; } - f = (struct vg_font*)font; + f = handle_to_font(font); font_draw_glyph(f, glyphIndex, paintModes, allowAutoHinting); } @@ -199,7 +200,7 @@ void vegaDrawGlyphs(VGFont font, return; } - f = (struct vg_font*)font; + f = handle_to_font(font); font_draw_glyphs(f, glyphCount, glyphIndices, adjustments_x, adjustments_y, paintModes, allowAutoHinting); diff --git a/src/gallium/state_trackers/vega/handle.c b/src/gallium/state_trackers/vega/handle.c new file mode 100644 index 00000000000..11eedd923ed --- /dev/null +++ b/src/gallium/state_trackers/vega/handle.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "handle.h" +#include "util/u_hash.h" +#include "util/u_hash_table.h" + + +/** + * Hash keys are 32-bit VGHandles + */ + +struct util_hash_table *handle_hash = NULL; + + +static unsigned next_handle = 1; + + +static unsigned +hash_func(void *key) +{ + /* XXX this kind of ugly */ + intptr_t ip = pointer_to_intptr(key); + return (unsigned) (ip & 0xffffffff); +} + + +static int +compare(void *key1, void *key2) +{ + if (key1 < key2) + return -1; + else if (key1 > key2) + return +1; + else + return 0; +} + + +void +init_handles(void) +{ + if (!handle_hash) + handle_hash = util_hash_table_create(hash_func, compare); +} + + +void +free_handles(void) +{ + /* XXX destroy */ +} + + +VGHandle +create_handle(void *object) +{ + VGHandle h = next_handle++; + util_hash_table_set(handle_hash, intptr_to_pointer(h), object); + return h; +} + + +void +destroy_handle(VGHandle h) +{ + util_hash_table_remove(handle_hash, intptr_to_pointer(h)); +} + diff --git a/src/gallium/state_trackers/vega/handle.h b/src/gallium/state_trackers/vega/handle.h new file mode 100644 index 00000000000..9ed326d5098 --- /dev/null +++ b/src/gallium/state_trackers/vega/handle.h @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Convert opaque VG object handles into pointers and vice versa. + * XXX This is not yet 64-bit safe! All VG handles are 32 bits in size. + */ + + +#ifndef HANDLE_H +#define HANDLE_H + +#include "pipe/p_compiler.h" +#include "util/u_hash_table.h" +#include "util/u_pointer.h" + +#include "VG/openvg.h" +#include "vg_context.h" + + +extern struct util_hash_table *handle_hash; + + +struct vg_mask_layer; +struct vg_font; +struct vg_image; +struct vg_paint; +struct path; + + +extern void +init_handles(void); + + +extern void +free_handles(void); + + +extern VGHandle +create_handle(void *object); + + +extern void +destroy_handle(VGHandle h); + + +static INLINE VGHandle +object_to_handle(struct vg_object *obj) +{ + return obj ? obj->handle : VG_INVALID_HANDLE; +} + + +static INLINE VGHandle +image_to_handle(struct vg_image *img) +{ + /* vg_image is derived from vg_object */ + return object_to_handle((struct vg_object *) img); +} + + +static INLINE VGHandle +masklayer_to_handle(struct vg_mask_layer *mask) +{ + /* vg_object is derived from vg_object */ + return object_to_handle((struct vg_object *) mask); +} + + +static INLINE VGHandle +font_to_handle(struct vg_font *font) +{ + return object_to_handle((struct vg_object *) font); +} + + +static INLINE VGHandle +paint_to_handle(struct vg_paint *paint) +{ + return object_to_handle((struct vg_object *) paint); +} + + +static INLINE VGHandle +path_to_handle(struct path *path) +{ + return object_to_handle((struct vg_object *) path); +} + + +static INLINE void * +handle_to_pointer(VGHandle h) +{ + void *v = util_hash_table_get(handle_hash, intptr_to_pointer(h)); +#ifdef DEBUG + if (v) { + struct vg_object *obj = (struct vg_object *) v; + assert(obj->handle == h); + } +#endif + return v; +} + + +static INLINE struct vg_font * +handle_to_font(VGHandle h) +{ + return (struct vg_font *) handle_to_pointer(h); +} + + +static INLINE struct vg_image * +handle_to_image(VGHandle h) +{ + return (struct vg_image *) handle_to_pointer(h); +} + + +static INLINE struct vg_mask_layer * +handle_to_masklayer(VGHandle h) +{ + return (struct vg_mask_layer *) handle_to_pointer(h); +} + + +static INLINE struct vg_object * +handle_to_object(VGHandle h) +{ + return (struct vg_object *) handle_to_pointer(h); +} + + +static INLINE struct vg_paint * +handle_to_paint(VGHandle h) +{ + return (struct vg_paint *) handle_to_pointer(h); +} + + +static INLINE struct path * +handle_to_path(VGHandle h) +{ + return (struct path *) handle_to_pointer(h); +} + + +#endif /* HANDLE_H */ diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 318ea94bdfb..73351b671f9 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -349,6 +349,8 @@ void image_destroy(struct vg_image *img) array_destroy(img->children_array); } + vg_free_object(&img->base); + pipe_sampler_view_reference(&img->sampler_view, NULL); FREE(img); } diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index 2db8cbcf7c8..6e5348a1ff2 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -748,3 +748,10 @@ void paint_fill_constant_buffer(struct vg_paint *paint, abort(); } } + +VGboolean paint_is_opaque(struct vg_paint *paint) +{ + /* TODO add other paint types and make sure PAINT_DIRTY gets set */ + return (paint->type == VG_PAINT_TYPE_COLOR && + floatsEqual(paint->solid.color[3], 1.0f)); +} diff --git a/src/gallium/state_trackers/vega/paint.h b/src/gallium/state_trackers/vega/paint.h index 3de3bbe12ed..e5357763b89 100644 --- a/src/gallium/state_trackers/vega/paint.h +++ b/src/gallium/state_trackers/vega/paint.h @@ -118,5 +118,6 @@ void paint_fill_constant_buffer(struct vg_paint *paint, const struct matrix *mat, void *buffer); +VGboolean paint_is_opaque(struct vg_paint *paint); #endif diff --git a/src/gallium/state_trackers/vega/polygon.c b/src/gallium/state_trackers/vega/polygon.c index a491de27fa6..bcc5cb272ca 100644 --- a/src/gallium/state_trackers/vega/polygon.c +++ b/src/gallium/state_trackers/vega/polygon.c @@ -303,7 +303,6 @@ void polygon_fill(struct polygon *poly, struct vg_context *ctx) vbuffer.buffer = poly->vbuf; vbuffer.stride = COMPONENTS * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil_begin(ctx->renderer, &velement, ctx->state.vg.fill_rule, VG_FALSE); @@ -354,7 +353,6 @@ void polygon_array_fill(struct polygon_array *polyarray, struct vg_context *ctx) polygon_prepare_buffer(ctx, poly); vbuffer.buffer = poly->vbuf; - vbuffer.max_index = poly->num_verts - 1; renderer_polygon_stencil(ctx->renderer, &vbuffer, PIPE_PRIM_TRIANGLE_FAN, 0, (VGuint) poly->num_verts); diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index e42bad76492..9de2cb1014d 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -28,6 +28,7 @@ #include "renderer.h" #include "vg_context.h" +#include "paint.h" /* for paint_is_opaque */ #include "pipe/p_context.h" #include "pipe/p_state.h" @@ -173,6 +174,7 @@ static void renderer_set_mvp(struct renderer *renderer, pipe_resource_reference(&cbuf, NULL); cbuf = pipe_buffer_create(renderer->pipe->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, sizeof(consts)); if (cbuf) { pipe_buffer_write(renderer->pipe, cbuf, @@ -473,7 +475,8 @@ static void renderer_set_custom_fs(struct renderer *renderer, pipe_resource_reference(&cbuf, NULL); cbuf = pipe_buffer_create(renderer->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, const_buffer_len); + PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STATIC, + const_buffer_len); pipe_buffer_write(renderer->pipe, cbuf, 0, const_buffer_len, const_buffer); renderer->pipe->set_constant_buffer(renderer->pipe, @@ -571,7 +574,7 @@ static void renderer_quad_draw(struct renderer *r) sizeof(r->vertices), PIPE_BIND_VERTEX_BUFFER); if (buf) { - util_draw_vertex_buffer(r->pipe, buf, 0, + util_draw_vertex_buffer(r->pipe, r->cso, buf, 0, PIPE_PRIM_TRIANGLE_FAN, Elements(r->vertices), /* verts */ Elements(r->vertices[0])); /* attribs/vert */ @@ -1049,7 +1052,7 @@ void renderer_polygon_stencil(struct renderer *renderer, { assert(renderer->state == RENDERER_STATE_POLYGON_STENCIL); - renderer->pipe->set_vertex_buffers(renderer->pipe, 1, vbuf); + cso_set_vertex_buffers(renderer->cso, 1, vbuf); if (!renderer->u.polygon_stencil.manual_two_sides) { util_draw_arrays(renderer->pipe, mode, start, count); @@ -1289,7 +1292,11 @@ static void renderer_validate_blend(struct renderer *renderer, blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; break; case VG_BLEND_SRC_OVER: - if (!util_format_has_alpha(fb_format)) { + if (paint_is_opaque(state->fill_paint) && + paint_is_opaque(state->stroke_paint)) { + /* no blending */ + } + else if (!util_format_has_alpha(fb_format)) { blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; diff --git a/src/gallium/state_trackers/vega/shader.c b/src/gallium/state_trackers/vega/shader.c index 2a6bae8a630..bee6d84001d 100644 --- a/src/gallium/state_trackers/vega/shader.c +++ b/src/gallium/state_trackers/vega/shader.c @@ -134,6 +134,8 @@ static VGboolean blend_use_shader(struct vg_context *ctx) switch (ctx->state.vg.blend_mode) { case VG_BLEND_SRC_OVER: advanced_blending = + (!paint_is_opaque(ctx->state.vg.fill_paint) || + !paint_is_opaque(ctx->state.vg.stroke_paint)) && util_format_has_alpha(ctx->draw_buffer->strb->format); break; case VG_BLEND_DST_OVER: diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c index f36f55d6c89..83b42609e03 100644 --- a/src/gallium/state_trackers/vega/vg_context.c +++ b/src/gallium/state_trackers/vega/vg_context.c @@ -33,6 +33,7 @@ #include "vg_manager.h" #include "api.h" #include "mask.h" +#include "handle.h" #include "pipe/p_context.h" #include "util/u_inlines.h" @@ -182,17 +183,26 @@ void vg_init_object(struct vg_object *obj, struct vg_context *ctx, enum vg_objec { obj->type = type; obj->ctx = ctx; + obj->handle = create_handle(obj); +} + +/** free object resources, but not the object itself */ +void vg_free_object(struct vg_object *obj) +{ + obj->type = 0; + obj->ctx = NULL; + destroy_handle(obj->handle); } VGboolean vg_context_is_object_valid(struct vg_context *ctx, enum vg_object_type type, - void *ptr) + VGHandle object) { if (ctx) { struct cso_hash *hash = ctx->owned_objects[type]; if (!hash) return VG_FALSE; - return cso_hash_contains(hash, (unsigned)(long)ptr); + return cso_hash_contains(hash, (unsigned)(long)object); } return VG_FALSE; } @@ -398,8 +408,9 @@ void vg_validate_state(struct vg_context *ctx) if (vg_context_update_depth_stencil_rb(ctx, stfb->width, stfb->height)) ctx->state.dirty |= DEPTH_STENCIL_DIRTY; - /* blend state depends on fb format */ - if (ctx->state.dirty & FRAMEBUFFER_DIRTY) + /* blend state depends on fb format and paint color */ + if ((ctx->state.dirty & FRAMEBUFFER_DIRTY) || + (ctx->state.dirty & PAINT_DIRTY)) ctx->state.dirty |= BLEND_DIRTY; renderer_validate(ctx->renderer, ctx->state.dirty, @@ -412,10 +423,10 @@ void vg_validate_state(struct vg_context *ctx) shader_set_color_transform(ctx->shader, ctx->state.vg.color_transform); } -VGboolean vg_object_is_valid(void *ptr, enum vg_object_type type) +VGboolean vg_object_is_valid(VGHandle object, enum vg_object_type type) { - struct vg_object *obj = ptr; - if (ptr && is_aligned(obj) && obj->type == type) + struct vg_object *obj = handle_to_object(object); + if (obj && is_aligned(obj) && obj->type == type) return VG_TRUE; else return VG_FALSE; diff --git a/src/gallium/state_trackers/vega/vg_context.h b/src/gallium/state_trackers/vega/vg_context.h index d616a20a3d9..71491a5aa22 100644 --- a/src/gallium/state_trackers/vega/vg_context.h +++ b/src/gallium/state_trackers/vega/vg_context.h @@ -81,10 +81,12 @@ enum dirty_state { BLEND_DIRTY = 1 << 0, FRAMEBUFFER_DIRTY = 1 << 1, DEPTH_STENCIL_DIRTY = 1 << 2, + PAINT_DIRTY = 1 << 3, ALL_DIRTY = BLEND_DIRTY | FRAMEBUFFER_DIRTY | - DEPTH_STENCIL_DIRTY + DEPTH_STENCIL_DIRTY | + PAINT_DIRTY }; struct vg_context @@ -129,12 +131,21 @@ struct vg_context struct blit_state *blit; }; + +/** + * Base class for VG objects like paths, images, fonts. + */ struct vg_object { enum vg_object_type type; + VGHandle handle; struct vg_context *ctx; }; + + void vg_init_object(struct vg_object *obj, struct vg_context *ctx, enum vg_object_type type); -VGboolean vg_object_is_valid(void *ptr, enum vg_object_type type); +void vg_free_object(struct vg_object *obj); + +VGboolean vg_object_is_valid(VGHandle object, enum vg_object_type type); struct vg_context *vg_create_context(struct pipe_context *pipe, const void *visual, @@ -145,7 +156,7 @@ void vg_set_current_context(struct vg_context *ctx); VGboolean vg_context_is_object_valid(struct vg_context *ctx, enum vg_object_type type, - void *ptr); + VGHandle object); void vg_context_add_object(struct vg_context *ctx, enum vg_object_type type, void *ptr); diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c index ec713b7fb1d..44d2996bb45 100644 --- a/src/gallium/state_trackers/vega/vg_manager.c +++ b/src/gallium/state_trackers/vega/vg_manager.c @@ -40,6 +40,7 @@ #include "vg_manager.h" #include "vg_context.h" #include "api.h" +#include "handle.h" static boolean vg_context_update_color_rb(struct vg_context *ctx, struct pipe_resource *pt) @@ -172,6 +173,9 @@ vg_api_create_context(struct st_api *stapi, struct st_manager *smapi, if (attribs->major > 1 || (attribs->major == 1 && attribs->minor > 0)) return NULL; + /* for VGHandle / pointer lookups */ + init_handles(); + pipe = smapi->screen->context_create(smapi->screen, NULL); if (!pipe) return NULL; diff --git a/src/gallium/state_trackers/vega/vgu.c b/src/gallium/state_trackers/vega/vgu.c index 7dc51c5599c..4206a91e006 100644 --- a/src/gallium/state_trackers/vega/vgu.c +++ b/src/gallium/state_trackers/vega/vgu.c @@ -29,6 +29,7 @@ #include "matrix.h" #include "path.h" +#include "handle.h" #include "util/u_debug.h" #include "util/u_pointer.h" @@ -36,16 +37,6 @@ #include <math.h> #include <assert.h> -static VGboolean is_aligned_to(const void *ptr, VGbyte alignment) -{ - void *aligned = align_pointer(ptr, alignment); - return (ptr == aligned) ? VG_TRUE : VG_FALSE; -} - -static VGboolean is_aligned(const void *ptr) -{ - return is_aligned_to(ptr, 4); -} static void vgu_append_float_coords(VGPath path, const VGubyte *cmds, @@ -54,7 +45,7 @@ static void vgu_append_float_coords(VGPath path, VGint num_coords) { VGubyte common_data[40 * sizeof(VGfloat)]; - struct path *p = (struct path *)path; + struct path *p = handle_to_path(path); vg_float_to_datatype(path_datatype(p), common_data, coords, num_coords); vgAppendPathData(path, num_cmds, cmds, common_data); diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index 1b7597de440..7cb953ba742 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -15,6 +15,9 @@ env.AppendUnique(CPPDEFINES = [ 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx ]) +if not env['gles']: + # prevent _glapi_* from being declared __declspec(dllimport) + env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) sources = [ 'stw_context.c', diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c index c4822d4d8aa..4ece4e4979a 100644 --- a/src/gallium/state_trackers/wgl/stw_device.c +++ b/src/gallium/state_trackers/wgl/stw_device.c @@ -170,7 +170,10 @@ stw_cleanup(void) _glthread_DESTROY_MUTEX(OneTimeLock); + /* glapi is statically linked: we can call the local destroy function. */ +#ifdef _GLAPI_NO_EXPORTS _glapi_destroy_multithread(); +#endif #ifdef DEBUG debug_memory_end(stw_dev->memdbg_no); diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 71f7b8c21d0..d751ac18704 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -135,14 +135,12 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, /* Only set gamma when needed, to avoid unneeded delays. */ #if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3 - if (!crtc->active) -#endif + if (!crtc->active && crtc->version >= 3) crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green, crtc->gamma_blue, crtc->gamma_size); - -#if defined(XF86_CRTC_VERSION) && XF86_CRTC_VERSION >= 3 crtc->active = TRUE; #endif + crtc->x = x; crtc->y = y; crtc->mode = *mode; diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index a3d7c5a70e2..3b3ae455c0b 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -73,7 +73,7 @@ renderer_draw(struct xorg_renderer *r) if (buf) { cso_set_vertex_elements(r->cso, r->attrs_per_vertex, r->velems); - util_draw_vertex_buffer(pipe, buf, 0, + util_draw_vertex_buffer(pipe, r->cso, buf, 0, PIPE_PRIM_QUADS, num_verts, /* verts */ r->attrs_per_vertex); /* attribs/vert */ @@ -429,6 +429,7 @@ void renderer_set_constants(struct xorg_renderer *r, pipe_resource_reference(cbuf, NULL); *cbuf = pipe_buffer_create(r->pipe->screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, param_bytes); if (*cbuf) { @@ -616,7 +617,7 @@ void renderer_draw_yuv(struct xorg_renderer *r, cso_set_vertex_elements(r->cso, num_attribs, r->velems); - util_draw_vertex_buffer(pipe, buf, 0, + util_draw_vertex_buffer(pipe, r->cso, buf, 0, PIPE_PRIM_QUADS, 4, /* verts */ num_attribs); /* attribs/vert */ diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c index 4a835c04d8a..78de154bdd7 100644 --- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c +++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c @@ -14,7 +14,7 @@ * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -176,6 +176,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture * template.height0 = util_next_power_of_two(height); } template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_DYNAMIC; template.bind = PIPE_BIND_SAMPLER_VIEW; template.flags = 0; @@ -222,7 +223,7 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh if (!subpicture) return XvMCBadSubpicture; - + /* Convert color to float */ util_format_read_4f(PIPE_FORMAT_B8G8R8A8_UNORM, color_f, 1, diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index d7285a478fb..c90ad409c10 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -122,6 +122,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i template.width0 = width; template.height0 = height; template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_DEFAULT; template.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE; template.flags = 0; @@ -240,6 +241,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac template.height0 = util_next_power_of_two(context->height); } template.depth0 = 1; + template.array_size = 1; template.usage = PIPE_USAGE_DEFAULT; template.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; template.flags = 0; diff --git a/src/gallium/targets/SConscript.dri b/src/gallium/targets/SConscript.dri index bc3671a256b..101863a6848 100644 --- a/src/gallium/targets/SConscript.dri +++ b/src/gallium/targets/SConscript.dri @@ -72,7 +72,6 @@ COMMON_DRI_DRM_OBJECTS = [ drienv.AppendUnique(LIBS = [ 'expat', - 'talloc', ]) Export([ diff --git a/src/gallium/targets/dri-r300/Makefile b/src/gallium/targets/dri-r300/Makefile index 9afbb13276d..cc77a4bc20d 100644 --- a/src/gallium/targets/dri-r300/Makefile +++ b/src/gallium/targets/dri-r300/Makefile @@ -22,6 +22,4 @@ DRIVER_DEFINES = \ include ../Makefile.dri -DRI_LIB_DEPS += -ldrm_radeon - symlinks: diff --git a/src/gallium/targets/dri-r300/SConscript b/src/gallium/targets/dri-r300/SConscript index 005b4bbf7f1..683b6c6972d 100644 --- a/src/gallium/targets/dri-r300/SConscript +++ b/src/gallium/targets/dri-r300/SConscript @@ -2,8 +2,6 @@ Import('*') env = drienv.Clone() -env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') - env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE', 'GALLIUM_GALAHAD']) env.Prepend(LIBS = [ diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript index aa771db2d1a..1df11a8747b 100644 --- a/src/gallium/targets/dri-r600/SConscript +++ b/src/gallium/targets/dri-r600/SConscript @@ -2,8 +2,6 @@ Import('*') env = drienv.Clone() -env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') - env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE']) env.Prepend(LIBS = [ diff --git a/src/gallium/targets/dri-vmwgfx/Makefile b/src/gallium/targets/dri-vmwgfx/Makefile index 38f78932e13..8c716f42fe2 100644 --- a/src/gallium/targets/dri-vmwgfx/Makefile +++ b/src/gallium/targets/dri-vmwgfx/Makefile @@ -6,6 +6,7 @@ LIBNAME = vmwgfx_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \ + $(TOP)/src/gallium/winsys/sw/wrapper/libwsw.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/svga/libsvga.a diff --git a/src/gallium/targets/egl-static/SConscript b/src/gallium/targets/egl-static/SConscript index 381ef4e862d..cbd98cc416a 100644 --- a/src/gallium/targets/egl-static/SConscript +++ b/src/gallium/targets/egl-static/SConscript @@ -58,10 +58,18 @@ if env['platform'] == 'windows': env.Prepend(LIBS = [ ws_gdi, ]) -else: - # OpenGL - env.Append(CPPDEFINES = ['FEATURE_GL=1']) - env.Prepend(LIBS = ['GL', 'talloc', glsl, mesa]) + +# OpenGL ES and OpenGL +if env['gles']: + env.Append(CPPDEFINES = [ + 'FEATURE_GL=1', + 'FEATURE_ES1=1', + 'FEATURE_ES2=1' + ]) + env.Prepend(LIBPATH = [shared_glapi.dir]) + # manually add LIBPREFIX on windows + glapi_name = 'glapi' if env['platform'] != 'windows' else 'libglapi' + env.Prepend(LIBS = [glapi_name, glsl, mesa]) # OpenVG if True: @@ -96,7 +104,6 @@ if env['drm']: ]) if env['drm_radeon']: - env.ParseConfig('pkg-config --cflags --libs libdrm_radeon') env.Append(CPPDEFINES = ['_EGL_PIPE_R300', '_EGL_PIPE_R600']) env.Prepend(LIBS = [ radeonwinsys, diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index 92d971bab01..de01939e5f1 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -46,6 +46,9 @@ ifneq ($(findstring x11, $(EGL_PLATFORMS)),) egl_SYS += -lX11 -lXext -lXfixes $(LIBDRM_LIB) egl_LIBS += $(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a endif +ifneq ($(findstring wayland, $(EGL_PLATFORMS)),) +egl_SYS += $(WAYLAND_LIBS) $(LIBDRM_LIB) +endif ifneq ($(findstring drm, $(EGL_PLATFORMS)),) egl_SYS += $(LIBDRM_LIB) endif @@ -89,7 +92,7 @@ nouveau_LIBS := \ # r300 pipe driver r300_CPPFLAGS := -r300_SYS := -ldrm -ldrm_radeon +r300_SYS := -ldrm r300_LIBS := \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/libgl-gdi/SConscript b/src/gallium/targets/libgl-gdi/SConscript index 6fa0851a1ab..49462a8e309 100644 --- a/src/gallium/targets/libgl-gdi/SConscript +++ b/src/gallium/targets/libgl-gdi/SConscript @@ -15,7 +15,6 @@ env.Append(LIBS = [ 'user32', 'kernel32', 'ws2_32', - talloc, ]) sources = ['libgl_gdi.c'] @@ -37,6 +36,12 @@ drivers += [trace, rbug] env['no_import_lib'] = 1 +# when GLES is enabled, gl* and _glapi_* belong to bridge_glapi and +# shared_glapi respectively +if env['gles']: + env.Prepend(LIBPATH = [shared_glapi.dir]) + glapi = [bridge_glapi, 'libglapi'] + opengl32 = env.SharedLibrary( target ='opengl32', source = sources, diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index d932736be78..ca15372f1d9 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript @@ -16,6 +16,12 @@ env.Append(CPPDEFINES = ['USE_XSHM']) env.Prepend(LIBS = env['X11_LIBS']) +# when GLES is enabled, gl* and _glapi_* belong to bridge_glapi and +# shared_glapi respectively +if env['gles']: + env.Prepend(LIBPATH = [shared_glapi.dir]) + glapi = [bridge_glapi, 'glapi'] + env.Prepend(LIBS = [ st_xlib, ws_xlib, @@ -23,7 +29,6 @@ env.Prepend(LIBS = [ mesa, glsl, gallium, - 'talloc' ]) sources = [ diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index d3bc3569929..6d5f2c3d16e 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -19,6 +19,6 @@ DRIVER_PIPES = \ $(TOP)/src/gallium/drivers/rbug/librbug.a DRIVER_LINKS = \ - $(shell pkg-config --libs libdrm libdrm_radeon) + $(shell pkg-config --libs libdrm) include ../Makefile.xorg diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 3341b884985..565fa5279cd 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -24,6 +24,7 @@ progs = [ 'gs-test', 'shader-leak', 'tri-gs', + 'quad-sample', ] for name in progs: diff --git a/src/gallium/tests/graw/fs-test.c b/src/gallium/tests/graw/fs-test.c index d21eb44e116..ff82b607110 100644 --- a/src/gallium/tests/graw/fs-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -215,7 +215,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c index 0c65390e109..cc05889dd05 100644 --- a/src/gallium/tests/graw/gs-test.c +++ b/src/gallium/tests/graw/gs-test.c @@ -251,13 +251,11 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); vbuf.buffer_offset = 0; if (draw_strip) { - vbuf.max_index = sizeof(vertices_strip) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices_strip, sizeof(vertices_strip), PIPE_BIND_VERTEX_BUFFER); } else { - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer = screen->user_buffer_create(screen, vertices, sizeof(vertices), diff --git a/src/gallium/tests/graw/quad-sample.c b/src/gallium/tests/graw/quad-sample.c new file mode 100644 index 00000000000..3c6458b434e --- /dev/null +++ b/src/gallium/tests/graw/quad-sample.c @@ -0,0 +1,414 @@ +/* Display a cleared blue window. This demo has no dependencies on + * any utility code, just the graw interface and gallium. + */ + +#include "state_tracker/graw.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + +#include "util/u_debug.h" /* debug_dump_surface_bmp() */ +#include "util/u_inlines.h" +#include "util/u_memory.h" /* Offset() */ +#include "util/u_draw_quad.h" +#include "util/u_box.h" + +#include <stdio.h> + +enum pipe_format formats[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_NONE +}; + +static const int WIDTH = 300; +static const int HEIGHT = 300; + +static struct pipe_screen *screen = NULL; +static struct pipe_context *ctx = NULL; +static struct pipe_resource *rttex = NULL; +static struct pipe_resource *samptex = NULL; +static struct pipe_surface *surf = NULL; +static struct pipe_sampler_view *sv = NULL; +static void *sampler = NULL; +static void *window = NULL; + +struct vertex { + float position[4]; + float color[4]; +}; + +static struct vertex vertices[] = +{ + { { 0.9, -0.9, 0.0, 1.0 }, + { 1, 0, 0, 1 } }, + + { { 0.9, 0.9, 0.0, 1.0 }, + { 1, 1, 0, 1 } }, + + { {-0.9, 0.9, 0.0, 1.0 }, + { 0, 1, 0, 1 } }, + + { {-0.9, -0.9, 0.0, 1.0 }, + { 0, 0, 0, 1 } }, +}; + + + + +static void set_viewport( float x, float y, + float width, float height, + float near, float far) +{ + float z = far; + float half_width = (float)width / 2.0f; + float half_height = (float)height / 2.0f; + float half_depth = ((float)far - (float)near) / 2.0f; + struct pipe_viewport_state vp; + + vp.scale[0] = half_width; + vp.scale[1] = half_height; + vp.scale[2] = half_depth; + vp.scale[3] = 1.0f; + + vp.translate[0] = half_width + x; + vp.translate[1] = half_height + y; + vp.translate[2] = half_depth + z; + vp.translate[3] = 0.0f; + + ctx->set_viewport_state( ctx, &vp ); +} + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[2]; + struct pipe_vertex_buffer vbuf; + void *handle; + + memset(ve, 0, sizeof ve); + + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + handle = ctx->create_vertex_elements_state(ctx, 2, ve); + ctx->bind_vertex_elements_state(ctx, handle); + + + vbuf.stride = sizeof( struct vertex ); + vbuf.buffer_offset = 0; + vbuf.buffer = screen->user_buffer_create(screen, + vertices, + sizeof(vertices), + PIPE_BIND_VERTEX_BUFFER); + + ctx->set_vertex_buffers(ctx, 1, &vbuf); +} + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], GENERIC[0]\n" + " 0: MOV OUT[1], IN[1]\n" + " 1: MOV OUT[0], IN[0]\n" + " 2: END\n"; + + handle = graw_parse_vertex_shader(ctx, text); + ctx->bind_vs_state(ctx, handle); +} + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + "FRAG\n" + "DCL IN[0], GENERIC[0], PERSPECTIVE\n" + "DCL OUT[0], COLOR\n" + "DCL TEMP[0]\n" + "DCL SAMP[0]\n" + "DCL RES[0], 2D, FLOAT\n" + " 0: SAMPLE TEMP[0], IN[0], RES[0], SAMP[0]\n" + " 1: MOV OUT[0], TEMP[0]\n" + " 2: END\n"; + + handle = graw_parse_fragment_shader(ctx, text); + ctx->bind_fs_state(ctx, handle); +} + + +static void draw( void ) +{ + float clear_color[4] = {.5,.5,.5,1}; + + ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + util_draw_arrays(ctx, PIPE_PRIM_QUADS, 0, 4); + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + + graw_save_surface_to_file(ctx, surf, NULL); + + screen->flush_frontbuffer(screen, rttex, 0, 0, window); +} + +#define SIZE 16 + +static void init_tex( void ) +{ + struct pipe_sampler_view sv_template; + struct pipe_sampler_state sampler_desc; + struct pipe_resource templat; + struct pipe_box box; + ubyte tex2d[SIZE][SIZE][4]; + int s, t; + +#if (SIZE != 2) + for (s = 0; s < SIZE; s++) { + for (t = 0; t < SIZE; t++) { + if (0) { + int x = (s ^ t) & 1; + tex2d[t][s][0] = (x) ? 0 : 63; + tex2d[t][s][1] = (x) ? 0 : 128; + tex2d[t][s][2] = 0; + tex2d[t][s][3] = 0xff; + } + else { + int x = ((s ^ t) >> 2) & 1; + tex2d[t][s][0] = s*255/(SIZE-1); + tex2d[t][s][1] = t*255/(SIZE-1); + tex2d[t][s][2] = (x) ? 0 : 128; + tex2d[t][s][3] = 0xff; + } + } + } +#else + tex2d[0][0][0] = 0; + tex2d[0][0][1] = 255; + tex2d[0][0][2] = 255; + tex2d[0][0][3] = 0; + + tex2d[0][1][0] = 0; + tex2d[0][1][1] = 0; + tex2d[0][1][2] = 255; + tex2d[0][1][3] = 255; + + tex2d[1][0][0] = 255; + tex2d[1][0][1] = 255; + tex2d[1][0][2] = 0; + tex2d[1][0][3] = 255; + + tex2d[1][1][0] = 255; + tex2d[1][1][1] = 0; + tex2d[1][1][2] = 0; + tex2d[1][1][3] = 255; +#endif + + templat.target = PIPE_TEXTURE_2D; + templat.format = PIPE_FORMAT_B8G8R8A8_UNORM; + templat.width0 = SIZE; + templat.height0 = SIZE; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = PIPE_BIND_SAMPLER_VIEW; + + + samptex = screen->resource_create(screen, + &templat); + if (samptex == NULL) + exit(4); + + u_box_2d(0,0,SIZE,SIZE, &box); + + ctx->transfer_inline_write(ctx, + samptex, + 0, + PIPE_TRANSFER_WRITE, + &box, + tex2d, + sizeof tex2d[0], + sizeof tex2d); + + /* Possibly read back & compare against original data: + */ + if (0) + { + struct pipe_transfer *t; + uint32_t *ptr; + t = pipe_get_transfer(ctx, samptex, + 0, 0, /* level, layer */ + PIPE_TRANSFER_READ, + 0, 0, SIZE, SIZE); /* x, y, width, height */ + + ptr = ctx->transfer_map(ctx, t); + + if (memcmp(ptr, tex2d, sizeof tex2d) != 0) { + assert(0); + exit(9); + } + + ctx->transfer_unmap(ctx, t); + + ctx->transfer_destroy(ctx, t); + } + + memset(&sv_template, 0, sizeof sv_template); + sv_template.format = samptex->format; + sv_template.texture = samptex; + sv_template.swizzle_r = 0; + sv_template.swizzle_g = 1; + sv_template.swizzle_b = 2; + sv_template.swizzle_a = 3; + sv = ctx->create_sampler_view(ctx, samptex, &sv_template); + if (sv == NULL) + exit(5); + + ctx->set_fragment_sampler_views(ctx, 1, &sv); + + + memset(&sampler_desc, 0, sizeof sampler_desc); + sampler_desc.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler_desc.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler_desc.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler_desc.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler_desc.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler_desc.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler_desc.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler_desc.compare_func = 0; + sampler_desc.normalized_coords = 1; + sampler_desc.max_anisotropy = 0; + + sampler = ctx->create_sampler_state(ctx, &sampler_desc); + if (sampler == NULL) + exit(6); + + ctx->bind_fragment_sampler_states(ctx, 1, &sampler); + +} + +static void init( void ) +{ + struct pipe_framebuffer_state fb; + struct pipe_resource templat; + struct pipe_surface surf_tmpl; + int i; + + /* It's hard to say whether window or screen should be created + * first. Different environments would prefer one or the other. + * + * Also, no easy way of querying supported formats if the screen + * cannot be created first. + */ + for (i = 0; formats[i] != PIPE_FORMAT_NONE; i++) { + screen = graw_create_window_and_screen(0, 0, 300, 300, + formats[i], + &window); + if (window && screen) + break; + } + if (!screen || !window) { + fprintf(stderr, "Unable to create window\n"); + exit(1); + } + + ctx = screen->context_create(screen, NULL); + if (ctx == NULL) + exit(3); + + templat.target = PIPE_TEXTURE_2D; + templat.format = formats[i]; + templat.width0 = WIDTH; + templat.height0 = HEIGHT; + templat.depth0 = 1; + templat.array_size = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + + rttex = screen->resource_create(screen, + &templat); + if (rttex == NULL) + exit(4); + + surf_tmpl.format = templat.format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = 0; + surf_tmpl.u.tex.first_layer = 0; + surf_tmpl.u.tex.last_layer = 0; + surf = ctx->create_surface(ctx, rttex, &surf_tmpl); + if (surf == NULL) + exit(5); + + memset(&fb, 0, sizeof fb); + fb.nr_cbufs = 1; + fb.width = WIDTH; + fb.height = HEIGHT; + fb.cbufs[0] = surf; + + ctx->set_framebuffer_state(ctx, &fb); + + { + struct pipe_blend_state blend; + void *handle; + memset(&blend, 0, sizeof blend); + blend.rt[0].colormask = PIPE_MASK_RGBA; + handle = ctx->create_blend_state(ctx, &blend); + ctx->bind_blend_state(ctx, handle); + } + + { + struct pipe_depth_stencil_alpha_state depthstencil; + void *handle; + memset(&depthstencil, 0, sizeof depthstencil); + handle = ctx->create_depth_stencil_alpha_state(ctx, &depthstencil); + ctx->bind_depth_stencil_alpha_state(ctx, handle); + } + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(&rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.gl_rasterization_rules = 1; + handle = ctx->create_rasterizer_state(ctx, &rasterizer); + ctx->bind_rasterizer_state(ctx, handle); + } + + set_viewport(0, 0, WIDTH, HEIGHT, 30, 1000); + + init_tex(); + + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); +} + +static void args(int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc;) { + if (graw_parse_args(&i, argc, argv)) { + continue; + } + exit(1); + } +} + + +int main( int argc, char *argv[] ) +{ + args(argc, argv); + init(); + + graw_set_display_func( draw ); + graw_main_loop(); + return 0; +} diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c index 58ca639d207..4e66813b301 100644 --- a/src/gallium/tests/graw/quad-tex.c +++ b/src/gallium/tests/graw/quad-tex.c @@ -97,7 +97,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/shader-leak.c b/src/gallium/tests/graw/shader-leak.c index 9af76f51ea2..a23ca73ac1d 100644 --- a/src/gallium/tests/graw/shader-leak.c +++ b/src/gallium/tests/graw/shader-leak.c @@ -88,7 +88,6 @@ static void set_vertices( void ) vbuf.stride = sizeof(struct vertex); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c index a1a00b32098..47b76530c6b 100644 --- a/src/gallium/tests/graw/tri-gs.c +++ b/src/gallium/tests/graw/tri-gs.c @@ -89,7 +89,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/tri-instanced.c b/src/gallium/tests/graw/tri-instanced.c index f61d8b9844d..259b3d9527c 100644 --- a/src/gallium/tests/graw/tri-instanced.c +++ b/src/gallium/tests/graw/tri-instanced.c @@ -132,7 +132,6 @@ static void set_vertices( void ) /* vertex data */ vbuf[0].stride = sizeof( struct vertex ); - vbuf[0].max_index = sizeof(vertices) / vbuf[0].stride; vbuf[0].buffer_offset = 0; vbuf[0].buffer = screen->user_buffer_create(screen, vertices, @@ -141,7 +140,6 @@ static void set_vertices( void ) /* instance data */ vbuf[1].stride = sizeof( inst_data[0] ); - vbuf[1].max_index = sizeof(inst_data) / vbuf[1].stride; vbuf[1].buffer_offset = 0; vbuf[1].buffer = screen->user_buffer_create(screen, inst_data, diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index 006d61ca88c..4266c0394d8 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -93,7 +93,6 @@ static void set_vertices( void ) vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/graw/vs-test.c b/src/gallium/tests/graw/vs-test.c index 1358fa85dfd..dd64d8b9301 100644 --- a/src/gallium/tests/graw/vs-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -171,7 +171,6 @@ static void set_vertices( void ) } vbuf.stride = sizeof( struct vertex ); - vbuf.max_index = sizeof(vertices) / vbuf.stride; vbuf.buffer_offset = 0; vbuf.buffer = screen->user_buffer_create(screen, vertices, diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index 92c5b4dbb18..af93e09d8d4 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -129,7 +129,8 @@ static void init_prog(struct program *p) } }; - p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices)); + p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(vertices)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices); } diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index 37c1573051f..b89cfe0d989 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -120,7 +120,8 @@ static void init_prog(struct program *p) } }; - p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, sizeof(vertices)); + p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, sizeof(vertices)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices); } diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript index d74f8a2e989..9f36d225e10 100644 --- a/src/gallium/winsys/SConscript +++ b/src/gallium/winsys/SConscript @@ -13,6 +13,10 @@ SConscript([ 'sw/gdi/SConscript', ]) +SConscript([ + 'i915/sw/SConscript', +]) + if env['dri']: SConscript([ 'sw/dri/SConscript', diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index ebe86dcf196..afeab5eef42 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -4,6 +4,7 @@ #include "i915_drm.h" #include "i915/i915_debug.h" +#include <xf86drm.h> #define BATCH_RESERVED 16 @@ -34,7 +35,6 @@ static void i915_drm_batchbuffer_reset(struct i915_drm_batchbuffer *batch) { struct i915_drm_winsys *idws = i915_drm_winsys(batch->base.iws); - int ret; if (batch->bo) drm_intel_bo_unreference(batch->bo); @@ -43,18 +43,6 @@ i915_drm_batchbuffer_reset(struct i915_drm_batchbuffer *batch) batch->actual_size, 4096); -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - ret = drm_intel_gem_bo_map_gtt(batch->bo); -#else - ret = drm_intel_bo_map(batch->bo, TRUE); -#endif - assert(ret == 0); - batch->base.map = batch->bo->virtual; -#else - (void)ret; -#endif - memset(batch->base.map, 0, batch->actual_size); batch->base.ptr = batch->base.map; batch->base.size = batch->actual_size - BATCH_RESERVED; @@ -87,7 +75,7 @@ static int i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add, bool fenced) + unsigned pre_add, boolean fenced) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); unsigned write_domain = 0; @@ -145,6 +133,12 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, return ret; } +static void +i915_drm_throttle(struct i915_drm_winsys *idws) +{ + drmIoctl(idws->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL); +} + static void i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, struct pipe_fence_handle **fence) @@ -168,6 +162,8 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, if (ret == 0 && i915_drm_winsys(ibatch->iws)->send_cmd) ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); + i915_drm_throttle(i915_drm_winsys(ibatch->iws)); + if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) { i915_dump_batchbuffer(ibatch); assert(ret == 0); diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index 44773ae30e7..8085591c8eb 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -1,6 +1,7 @@ #include "i915_sw_winsys.h" #include "i915/i915_batchbuffer.h" +#include "i915/i915_debug.h" #include "util/u_memory.h" #define BATCH_RESERVED 16 @@ -61,7 +62,7 @@ static int i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add, bool fenced) + unsigned pre_add, boolean fenced) { struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); int ret = 0; @@ -98,7 +99,6 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, { struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); unsigned used = 0; - int i; assert(i915_winsys_batchbuffer_space(ibatch) >= 0); @@ -122,12 +122,7 @@ i915_sw_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, assert((used & 4) == 0); if (i915_sw_winsys(ibatch->iws)->dump_cmd) { - unsigned *ptr = (unsigned *)batch->base.map; - - debug_printf("%s:\n", __func__); - for (i = 0; i < used / 4; i++, ptr++) { - debug_printf("\t%08x: %08x\n", i*4, *ptr); - } + i915_dump_batchbuffer(ibatch); } if (fence) { diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c index 834805e621d..9a7e90e2173 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c @@ -84,7 +84,7 @@ i915_sw_buffer_write(struct i915_winsys *iws, { struct i915_sw_buffer *buf = i915_sw_buffer(buffer); - memcpy(buf->ptr + offset, data, size); + memcpy((char*)buf->ptr + offset, data, size); return 0; } diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript index f97434e995d..cc9a06a2393 100644 --- a/src/gallium/winsys/r600/drm/SConscript +++ b/src/gallium/winsys/r600/drm/SConscript @@ -13,7 +13,12 @@ r600_sources = [ 'r600_bomgr.c', ] -env.ParseConfig('pkg-config --cflags libdrm_radeon') +try: + env.ParseConfig('pkg-config --cflags libdrm_radeon') +except OSError: + print 'warning: not building r600g' + Return() + env.Append(CPPPATH = '#/src/gallium/drivers/r600') r600winsys = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 3fdafc39283..aa4035a302b 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -621,6 +621,8 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; + ctx->max_db = 8; + LIST_INITHEAD(&ctx->fenced_bo); /* init dirty list */ diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 339d5dc47f4..06e8f6910f3 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -108,11 +108,10 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch); if (array_mode) { if (bo->tiling_flags) { - if (bo->tiling_flags & RADEON_TILING_MICRO) - *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; - if ((bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) == - (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) + if (bo->tiling_flags & RADEON_TILING_MACRO) *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + else if (bo->tiling_flags & RADEON_TILING_MICRO) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; } else { *array_mode = 0; } diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 69b0a1dcd72..f5cd48d39c6 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -41,6 +41,10 @@ #define RADEON_INFO_TILING_CONFIG 0x6 #endif +#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9 +#endif + enum radeon_family r600_get_family(struct radeon *r600) { return r600->family; @@ -56,9 +60,14 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) return &radeon->tiling_info; } +unsigned r600_get_clock_crystal_freq(struct radeon *radeon) +{ + return radeon->clock_crystal_freq; +} + static int radeon_get_device(struct radeon *radeon) { - struct drm_radeon_info info; + struct drm_radeon_info info = {}; int r; radeon->device = 0; @@ -69,20 +78,8 @@ static int radeon_get_device(struct radeon *radeon) return r; } -static int radeon_drm_get_tiling(struct radeon *radeon) +static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) { - struct drm_radeon_info info; - int r; - uint32_t tiling_config; - - info.request = RADEON_INFO_TILING_CONFIG; - info.value = (uintptr_t)&tiling_config; - r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, - sizeof(struct drm_radeon_info)); - - if (r) - return 0; - switch ((tiling_config & 0xe) >> 1) { case 0: radeon->tiling_info.num_channels = 1; @@ -124,6 +121,80 @@ static int radeon_drm_get_tiling(struct radeon *radeon) return 0; } +static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config) +{ + switch (tiling_config & 0xf) { + case 0: + radeon->tiling_info.num_channels = 1; + break; + case 1: + radeon->tiling_info.num_channels = 2; + break; + case 2: + radeon->tiling_info.num_channels = 4; + break; + case 3: + radeon->tiling_info.num_channels = 8; + break; + default: + return -EINVAL; + } + + radeon->tiling_info.num_banks = (tiling_config & 0xf0) >> 4; + + switch ((tiling_config & 0xf00) >> 8) { + case 0: + radeon->tiling_info.group_bytes = 256; + break; + case 1: + radeon->tiling_info.group_bytes = 512; + break; + default: + return -EINVAL; + } + return 0; +} + +static int radeon_drm_get_tiling(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + uint32_t tiling_config = 0; + + info.request = RADEON_INFO_TILING_CONFIG; + info.value = (uintptr_t)&tiling_config; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + + if (r) + return 0; + + if (radeon->chip_class == R600 || radeon->chip_class == R700) { + r = r600_interpret_tiling(radeon, tiling_config); + } else { + r = eg_interpret_tiling(radeon, tiling_config); + } + return r; +} + +static int radeon_get_clock_crystal_freq(struct radeon *radeon) +{ + struct drm_radeon_info info; + uint32_t clock_crystal_freq; + int r; + + radeon->device = 0; + info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ; + info.value = (uintptr_t)&clock_crystal_freq; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + if (r) + return r; + + radeon->clock_crystal_freq = clock_crystal_freq; + return 0; +} + static int radeon_init_fence(struct radeon *radeon) { radeon->fence = 1; @@ -201,10 +272,12 @@ static struct radeon *radeon_new(int fd, unsigned device) break; } - if (radeon->chip_class == R600 || radeon->chip_class == R700) { - if (radeon_drm_get_tiling(radeon)) - return NULL; - } + if (radeon_drm_get_tiling(radeon)) + return NULL; + + /* get the GPU counter frequency, failure is non fatal */ + radeon_get_clock_crystal_freq(radeon); + radeon->bomgr = r600_bomgr_create(radeon, 1000000); if (radeon->bomgr == NULL) { return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f10e2fda6f2..f170640407d 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -637,7 +637,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->range[i].end_offset = ((i + 1) << ctx->hash_shift) - 1; ctx->range[i].blocks = calloc(1 << ctx->hash_shift, sizeof(void*)); if (ctx->range[i].blocks == NULL) { - return -ENOMEM; + r = -ENOMEM; + goto out_err; } } @@ -750,6 +751,9 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) /* init dirty list */ LIST_INITHEAD(&ctx->dirty); + + ctx->max_db = 4; + return 0; out_err: r600_context_fini(ctx); @@ -1084,7 +1088,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) void r600_context_flush(struct r600_context *ctx) { - struct drm_radeon_cs drmib; + struct drm_radeon_cs drmib = {}; struct drm_radeon_cs_chunk chunks[2]; uint64_t chunk_array[2]; unsigned fence; @@ -1246,43 +1250,70 @@ out_err: bof_decref(root); } -static void r600_query_result(struct r600_context *ctx, struct r600_query *query) +static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) { u64 start, end; u32 *results; int i; + int size; + + if (wait) + results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_CPU_READ, NULL); + else + results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_READ, NULL); + if (!results) + return FALSE; - results = r600_bo_map(ctx->radeon, query->buffer, 0, NULL); - for (i = 0; i < query->num_results; i += 4) { + size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); + for (i = 0; i < size; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; - if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { + if (((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) + || query->type == PIPE_QUERY_TIME_ELAPSED) { query->result += end - start; } } r600_bo_unmap(ctx->radeon, query->buffer); query->num_results = 0; + + return TRUE; } void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { - /* query request needs 6 dwords for begin + 6 dwords for end */ - if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + unsigned required_space; + + /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ + if (query->type == PIPE_QUERY_TIME_ELAPSED) + required_space = 16; + else + required_space = 12; + + if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) { /* need to flush */ r600_context_flush(ctx); } /* if query buffer is full force a flush */ - if (query->num_results >= ((query->buffer_size >> 2) - 2)) { + if (query->num_results*4 >= query->buffer_size - 16) { r600_context_flush(ctx); - r600_query_result(ctx, query); + r600_query_result(ctx, query, TRUE); } /* emit begin query */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results + r600_bo_offset(query->buffer); - ctx->pm4[ctx->pm4_cdwords++] = 0; + if (query->type == PIPE_QUERY_TIME_ELAPSED) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1295,15 +1326,24 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) void r600_query_end(struct r600_context *ctx, struct r600_query *query) { /* emit begin query */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); - ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8 + r600_bo_offset(query->buffer); - ctx->pm4[ctx->pm4_cdwords++] = 0; + if (query->type == PIPE_QUERY_TIME_ELAPSED) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = 0; + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = 0; + } ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); - query->num_results += 16; + query->num_results += 4 * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); query->state ^= R600_QUERY_STATE_STARTED; query->state |= R600_QUERY_STATE_ENDED; ctx->num_query_running--; @@ -1313,7 +1353,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned { struct r600_query *query; - if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER && query_type != PIPE_QUERY_TIME_ELAPSED) return NULL; query = calloc(1, sizeof(struct r600_query)); @@ -1355,8 +1395,12 @@ boolean r600_context_query_result(struct r600_context *ctx, if (query->num_results) { r600_context_flush(ctx); } - r600_query_result(ctx, query); - *result = query->result; + if (!r600_query_result(ctx, query, wait)) + return FALSE; + if (query->type == PIPE_QUERY_TIME_ELAPSED) + *result = (1000000*query->result)/r600_get_clock_crystal_freq(ctx->radeon); + else + *result = query->result; query->result = 0; return TRUE; } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index a38a6481b4f..2d91cd97d68 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -49,6 +49,7 @@ struct radeon { unsigned fence; unsigned *cfence; struct r600_bo *fence_bo; + unsigned clock_crystal_freq; }; struct r600_reg { diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 999de82646d..3643ddbcb93 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -98,7 +98,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle, bo->size = open_arg.size; bo->shared = TRUE; } else { - struct drm_radeon_gem_create args; + struct drm_radeon_gem_create args = {}; args.size = size; args.alignment = alignment; @@ -204,7 +204,7 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon, uint32_t *tiling_flags, uint32_t *pitch) { - struct drm_radeon_gem_get_tiling args; + struct drm_radeon_gem_get_tiling args = {}; int ret; args.handle = bo->handle; diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index 06681791e57..f19956931de 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -32,7 +32,7 @@ struct pci_id { unsigned family; }; -struct pci_id radeon_pci_id[] = { +static const struct pci_id radeon_pci_id[] = { {0x1002, 0x3150, CHIP_RV380}, {0x1002, 0x3152, CHIP_RV380}, {0x1002, 0x3154, CHIP_RV380}, diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index 7e339a2ecfe..e63ae6f5006 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -5,10 +5,9 @@ include $(TOP)/configs/current LIBNAME = radeonwinsys C_SOURCES = \ - radeon_drm_buffer.c \ + radeon_drm_bo.c \ radeon_drm_cs.c \ - radeon_drm_common.c \ - radeon_r300.c + radeon_drm_common.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r300 \ $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 80816621848..b16e03556d3 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -3,13 +3,17 @@ Import('*') env = env.Clone() radeon_sources = [ - 'radeon_drm_buffer.c', + 'radeon_drm_bo.c', 'radeon_drm_cs.c', 'radeon_drm_common.c', - 'radeon_r300.c', ] -env.ParseConfig('pkg-config --cflags libdrm_radeon') +try: + env.ParseConfig('pkg-config --cflags libdrm_radeon') +except: + print 'warning: not building r300g' + Return() + env.Append(CPPPATH = '#/src/gallium/drivers/r300') radeonwinsys = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c new file mode 100644 index 00000000000..3094337a3cd --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -0,0 +1,599 @@ +/* + * Copyright © 2011 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#define _FILE_OFFSET_BITS 64 +#include "radeon_drm_cs.h" + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_thread.h" + +#include "state_tracker/drm_driver.h" + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <xf86drm.h> +#include <errno.h> + +#define RADEON_BO_FLAGS_MACRO_TILE 1 +#define RADEON_BO_FLAGS_MICRO_TILE 2 +#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 + +extern const struct pb_vtbl radeon_bo_vtbl; + + +static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo) +{ + assert(bo->vtbl == &radeon_bo_vtbl); + return (struct radeon_bo *)bo; +} + +struct radeon_bomgr { + /* Base class. */ + struct pb_manager base; + + /* Winsys. */ + struct radeon_drm_winsys *rws; + + /* List of buffer handles and its mutex. */ + struct util_hash_table *bo_handles; + pipe_mutex bo_handles_mutex; +}; + +static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) +{ + return (struct radeon_bomgr *)mgr; +} + +static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf) +{ + struct radeon_bo *bo = NULL; + + if (_buf->vtbl == &radeon_bo_vtbl) { + bo = radeon_bo(_buf); + } else { + struct pb_buffer *base_buf; + pb_size offset; + pb_get_base_buffer(_buf, &base_buf, &offset); + + if (base_buf->vtbl == &radeon_bo_vtbl) + bo = radeon_bo(base_buf); + } + + return bo; +} + +void radeon_bo_unref(struct radeon_bo *bo) +{ + struct drm_gem_close args = {}; + + if (!p_atomic_dec_zero(&bo->ref_count)) + return; + + if (bo->name) { + pipe_mutex_lock(bo->mgr->bo_handles_mutex); + util_hash_table_remove(bo->mgr->bo_handles, + (void*)(uintptr_t)bo->name); + pipe_mutex_unlock(bo->mgr->bo_handles_mutex); + } + + if (bo->ptr) + munmap(bo->ptr, bo->size); + + /* Close object. */ + args.handle = bo->handle; + drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); + pipe_mutex_destroy(bo->map_mutex); + FREE(bo); +} + +static void radeon_bo_wait(struct r300_winsys_bo *_buf) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_wait_idle args = {}; + + while (p_atomic_read(&bo->num_active_ioctls)) { + sched_yield(); + } + + args.handle = bo->handle; + while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, + &args, sizeof(args)) == -EBUSY); +} + +static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_busy args = {}; + + if (p_atomic_read(&bo->num_active_ioctls)) { + return TRUE; + } + + args.handle = bo->handle; + return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)) != 0; +} + +static void radeon_bo_destroy(struct pb_buffer *_buf) +{ + struct radeon_bo *bo = radeon_bo(_buf); + + radeon_bo_unref(bo); +} + +static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) +{ + unsigned res = 0; + + if (usage & PIPE_TRANSFER_DONTBLOCK) + res |= PB_USAGE_DONTBLOCK; + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + res |= PB_USAGE_UNSYNCHRONIZED; + + return res; +} + +static void *radeon_bo_map_internal(struct pb_buffer *_buf, + unsigned flags, void *flush_ctx) +{ + struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_drm_cs *cs = flush_ctx; + struct drm_radeon_gem_mmap args = {}; + void *ptr; + + /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ + if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { + /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ + if (flags & PB_USAGE_DONTBLOCK) { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data); + return NULL; + } + + if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) { + return NULL; + } + } else { + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data); + } + + radeon_bo_wait((struct r300_winsys_bo*)bo); + } + } + + /* Return the pointer if it's already mapped. */ + if (bo->ptr) + return bo->ptr; + + /* Map the buffer. */ + pipe_mutex_lock(bo->map_mutex); + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->size; + if (drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_MMAP, + &args, + sizeof(args))) { + pipe_mutex_unlock(bo->map_mutex); + fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", + bo, bo->handle); + return NULL; + } + + ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + bo->rws->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + pipe_mutex_unlock(bo->map_mutex); + fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); + return NULL; + } + bo->ptr = ptr; + pipe_mutex_unlock(bo->map_mutex); + + return bo->ptr; +} + +static void radeon_bo_unmap_internal(struct pb_buffer *_buf) +{ + /* NOP */ +} + +static void radeon_bo_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + +static enum pipe_error radeon_bo_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + /* Always pinned */ + return PIPE_OK; +} + +static void radeon_bo_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ +} + +const struct pb_vtbl radeon_bo_vtbl = { + radeon_bo_destroy, + radeon_bo_map_internal, + radeon_bo_unmap_internal, + radeon_bo_validate, + radeon_bo_fence, + radeon_bo_get_base_buffer, +}; + +static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct radeon_bomgr *mgr = radeon_bomgr(_mgr); + struct radeon_drm_winsys *rws = mgr->rws; + struct radeon_bo *bo; + struct drm_radeon_gem_create args = {}; + + args.size = size; + args.alignment = desc->alignment; + args.initial_domain = + (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? + RADEON_GEM_DOMAIN_GTT : 0) | + (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? + RADEON_GEM_DOMAIN_VRAM : 0); + + if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, + &args, sizeof(args))) { + fprintf(stderr, "Failed to allocate :\n"); + fprintf(stderr, " size : %d bytes\n", size); + fprintf(stderr, " alignment : %d bytes\n", desc->alignment); + fprintf(stderr, " domains : %d\n", args.initial_domain); + return NULL; + } + + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment = desc->alignment; + bo->base.base.usage = desc->usage; + bo->base.base.size = size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->mgr = mgr; + bo->rws = mgr->rws; + bo->handle = args.handle; + bo->size = size; + pipe_mutex_init(bo->map_mutex); + + radeon_bo_ref(bo); + return &bo->base; +} + +static void radeon_bomgr_flush(struct pb_manager *mgr) +{ + /* NOP */ +} + +/* This is for the cache bufmgr. */ +static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr, + struct pb_buffer *_buf) +{ + struct radeon_bo *bo = radeon_bo(_buf); + + if (radeon_bo_is_referenced_by_any_cs(bo)) { + return TRUE; + } + + if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) { + return TRUE; + } + + return FALSE; +} + +static void radeon_bomgr_destroy(struct pb_manager *_mgr) +{ + struct radeon_bomgr *mgr = radeon_bomgr(_mgr); + util_hash_table_destroy(mgr->bo_handles); + pipe_mutex_destroy(mgr->bo_handles_mutex); + FREE(mgr); +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws) +{ + struct radeon_bomgr *mgr; + + mgr = CALLOC_STRUCT(radeon_bomgr); + if (!mgr) + return NULL; + + mgr->base.destroy = radeon_bomgr_destroy; + mgr->base.create_buffer = radeon_bomgr_create_bo; + mgr->base.flush = radeon_bomgr_flush; + mgr->base.is_buffer_busy = radeon_bomgr_is_buffer_busy; + + mgr->rws = rws; + mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); + pipe_mutex_init(mgr->bo_handles_mutex); + return &mgr->base; +} + +static void *radeon_bo_map(struct r300_winsys_bo *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage) +{ + struct pb_buffer *_buf = pb_buffer(buf); + + return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs); +} + +static void radeon_bo_get_tiling(struct r300_winsys_bo *_buf, + enum r300_buffer_tiling *microtiled, + enum r300_buffer_tiling *macrotiled) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct drm_radeon_gem_set_tiling args = {}; + + args.handle = bo->handle; + + drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_GET_TILING, + &args, + sizeof(args)); + + *microtiled = R300_BUFFER_LINEAR; + *macrotiled = R300_BUFFER_LINEAR; + if (args.tiling_flags & RADEON_BO_FLAGS_MICRO_TILE) + *microtiled = R300_BUFFER_TILED; + + if (args.tiling_flags & RADEON_BO_FLAGS_MACRO_TILE) + *macrotiled = R300_BUFFER_TILED; +} + +static void radeon_bo_set_tiling(struct r300_winsys_bo *_buf, + struct r300_winsys_cs *rcs, + enum r300_buffer_tiling microtiled, + enum r300_buffer_tiling macrotiled, + uint32_t pitch) +{ + struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct drm_radeon_gem_set_tiling args = {}; + + /* Tiling determines how DRM treats the buffer data. + * We must flush CS when changing it if the buffer is referenced. */ + if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { + radeon_drm_cs_flush(rcs); + radeon_drm_cs_sync_flush(rcs); + } + + while (p_atomic_read(&bo->num_active_ioctls)) { + sched_yield(); + } + + if (microtiled == R300_BUFFER_TILED) + args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE; + else if (microtiled == R300_BUFFER_SQUARETILED) + args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; + + if (macrotiled == R300_BUFFER_TILED) + args.tiling_flags |= RADEON_BO_FLAGS_MACRO_TILE; + + args.handle = bo->handle; + args.pitch = pitch; + + drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_SET_TILING, + &args, + sizeof(args)); +} + +static struct r300_winsys_cs_handle *radeon_drm_get_cs_handle( + struct r300_winsys_bo *_buf) +{ + /* return radeon_bo. */ + return (struct r300_winsys_cs_handle*) + get_radeon_bo(pb_buffer(_buf)); +} + +static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, + enum r300_buffer_domain domain) +{ + unsigned res = 0; + + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + res |= RADEON_PB_USAGE_CACHE; + + if (domain & R300_DOMAIN_GTT) + res |= RADEON_PB_USAGE_DOMAIN_GTT; + + if (domain & R300_DOMAIN_VRAM) + res |= RADEON_PB_USAGE_DOMAIN_VRAM; + + return res; +} + +static struct r300_winsys_bo * +radeon_winsys_bo_create(struct r300_winsys_screen *rws, + unsigned size, + unsigned alignment, + unsigned bind, + unsigned usage, + enum r300_buffer_domain domain) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct pb_desc desc; + struct pb_manager *provider; + struct pb_buffer *buffer; + + memset(&desc, 0, sizeof(desc)); + desc.alignment = alignment; + desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); + + /* Assign a buffer manager. */ + if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) + provider = ws->cman; + else + provider = ws->kman; + + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + return (struct r300_winsys_bo*)buffer; +} + +static struct r300_winsys_bo *radeon_winsys_bo_from_handle(struct r300_winsys_screen *rws, + struct winsys_handle *whandle, + unsigned *stride, + unsigned *size) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_bo *bo; + struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); + struct drm_gem_open open_arg = {}; + + /* We must maintain a list of pairs <handle, bo>, so that we always return + * the same BO for one particular handle. If we didn't do that and created + * more than one BO for the same handle and then relocated them in a CS, + * we would hit a deadlock in the kernel. + * + * The list of pairs is guarded by a mutex, of course. */ + pipe_mutex_lock(mgr->bo_handles_mutex); + + /* First check if there already is an existing bo for the handle. */ + bo = util_hash_table_get(mgr->bo_handles, (void*)(uintptr_t)whandle->handle); + if (bo) { + /* Increase the refcount. */ + struct pb_buffer *b = NULL; + pb_reference(&b, &bo->base); + goto done; + } + + /* There isn't, create a new one. */ + bo = CALLOC_STRUCT(radeon_bo); + if (!bo) { + goto fail; + } + + /* Open the BO. */ + open_arg.name = whandle->handle; + if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) { + FREE(bo); + goto fail; + } + bo->handle = open_arg.handle; + bo->size = open_arg.size; + bo->name = whandle->handle; + radeon_bo_ref(bo); + + /* Initialize it. */ + pipe_reference_init(&bo->base.base.reference, 1); + bo->base.base.alignment = 0; + bo->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; + bo->base.base.size = bo->size; + bo->base.vtbl = &radeon_bo_vtbl; + bo->mgr = mgr; + bo->rws = mgr->rws; + pipe_mutex_init(bo->map_mutex); + + util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); + +done: + pipe_mutex_unlock(mgr->bo_handles_mutex); + + if (stride) + *stride = whandle->stride; + if (size) + *size = bo->base.base.size; + + return (struct r300_winsys_bo*)bo; + +fail: + pipe_mutex_unlock(mgr->bo_handles_mutex); + return NULL; +} + +static boolean radeon_winsys_bo_get_handle(struct r300_winsys_bo *buffer, + unsigned stride, + struct winsys_handle *whandle) +{ + struct drm_gem_flink flink = {}; + struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer)); + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!bo->flinked) { + flink.handle = bo->handle; + + if (ioctl(bo->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { + return FALSE; + } + + bo->flinked = TRUE; + bo->flink = flink.name; + } + whandle->handle = bo->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + } + + whandle->stride = stride; + return TRUE; +} + +void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; + ws->base.buffer_set_tiling = radeon_bo_set_tiling; + ws->base.buffer_get_tiling = radeon_bo_get_tiling; + ws->base.buffer_map = radeon_bo_map; + ws->base.buffer_unmap = pb_unmap; + ws->base.buffer_wait = radeon_bo_wait; + ws->base.buffer_is_busy = radeon_bo_is_busy; + ws->base.buffer_create = radeon_winsys_bo_create; + ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; + ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 494abdc0b48..a26866b7e75 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -1,5 +1,6 @@ /* * Copyright © 2008 Jérôme Glisse + * Copyright © 2011 Marek Olšák <[email protected]> * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining @@ -26,28 +27,61 @@ /* * Authors: * Jérôme Glisse <[email protected]> + * Marek Olšák <[email protected]> */ #ifndef RADEON_DRM_BUFFER_H #define RADEON_DRM_BUFFER_H #include "radeon_winsys.h" +#include "pipebuffer/pb_bufmgr.h" +#include "os/os_thread.h" -#define RADEON_PB_USAGE_VERTEX (1 << 28) +#define RADEON_PB_USAGE_CACHE (1 << 28) #define RADEON_PB_USAGE_DOMAIN_GTT (1 << 29) #define RADEON_PB_USAGE_DOMAIN_VRAM (1 << 30) +struct radeon_bomgr; + +struct radeon_bo { + struct pb_buffer base; + struct radeon_bomgr *mgr; + struct radeon_drm_winsys *rws; + + void *ptr; + pipe_mutex map_mutex; + + uint32_t size; + uint32_t handle; + uint32_t name; + + int ref_count; + + /* how many command streams is this bo referenced in? */ + int num_cs_references; + + /* how many command streams, which are being emitted in a separate + * thread, is this bo referenced in? */ + int num_active_ioctls; + + boolean flinked; + uint32_t flink; +}; + +struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws); +void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws); + +void radeon_bo_unref(struct radeon_bo *buf); + + +static INLINE void radeon_bo_ref(struct radeon_bo *bo) +{ + p_atomic_inc(&bo->ref_count); +} + static INLINE struct pb_buffer * -radeon_pb_buffer(struct r300_winsys_buffer *buffer) +pb_buffer(struct r300_winsys_bo *buffer) { return (struct pb_buffer *)buffer; } -struct pb_manager *radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws); -struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle); -void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); -boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, - struct winsys_handle *whandle); -void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws); - #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c deleted file mode 100644 index 5e14287ec2d..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ /dev/null @@ -1,532 +0,0 @@ -#include "radeon_drm_buffer.h" -#include "radeon_drm_cs.h" - -#include "util/u_hash_table.h" -#include "util/u_memory.h" -#include "util/u_simple_list.h" -#include "pipebuffer/pb_bufmgr.h" -#include "os/os_thread.h" - -#include "state_tracker/drm_driver.h" - -#include <radeon_drm.h> -#include <radeon_bo_gem.h> -#include <sys/ioctl.h> - -struct radeon_drm_bufmgr; - -struct radeon_drm_buffer { - struct pb_buffer base; - struct radeon_drm_bufmgr *mgr; - - struct radeon_bo *bo; - - boolean flinked; - uint32_t flink; - - struct radeon_drm_buffer *next, *prev; -}; - -extern const struct pb_vtbl radeon_drm_buffer_vtbl; - - -static INLINE struct radeon_drm_buffer * -radeon_drm_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &radeon_drm_buffer_vtbl); - return (struct radeon_drm_buffer *)buf; -} - -struct radeon_drm_bufmgr { - /* Base class. */ - struct pb_manager base; - - /* Winsys. */ - struct radeon_drm_winsys *rws; - - /* List of mapped buffers and its mutex. */ - struct radeon_drm_buffer buffer_map_list; - pipe_mutex buffer_map_list_mutex; - - /* List of buffer handles and its mutex. */ - struct util_hash_table *buffer_handles; - pipe_mutex buffer_handles_mutex; -}; - -static INLINE struct radeon_drm_bufmgr * -radeon_drm_bufmgr(struct pb_manager *mgr) -{ - assert(mgr); - return (struct radeon_drm_bufmgr *)mgr; -} - -static void -radeon_drm_buffer_destroy(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - int name; - - if (buf->bo->ptr != NULL) { - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - /* Now test it again inside the mutex. */ - if (buf->bo->ptr != NULL) { - remove_from_list(buf); - radeon_bo_unmap(buf->bo); - buf->bo->ptr = NULL; - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - } - name = radeon_gem_name_bo(buf->bo); - if (name) { - pipe_mutex_lock(buf->mgr->buffer_handles_mutex); - util_hash_table_remove(buf->mgr->buffer_handles, - (void*)(uintptr_t)name); - pipe_mutex_unlock(buf->mgr->buffer_handles_mutex); - } - radeon_bo_unref(buf->bo); - - FREE(buf); -} - -static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) -{ - unsigned res = 0; - - if (usage & PIPE_TRANSFER_READ) - res |= PB_USAGE_CPU_READ; - - if (usage & PIPE_TRANSFER_WRITE) - res |= PB_USAGE_CPU_WRITE; - - if (usage & PIPE_TRANSFER_DONTBLOCK) - res |= PB_USAGE_DONTBLOCK; - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) - res |= PB_USAGE_UNSYNCHRONIZED; - - return res; -} - -static void * -radeon_drm_buffer_map_internal(struct pb_buffer *_buf, - unsigned flags, void *flush_ctx) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - struct radeon_drm_cs *cs = flush_ctx; - int write = 0; - - /* Note how we use radeon_bo_is_referenced_by_cs here. There are - * basically two places this map function can be called from: - * - pb_map - * - create_buffer (in the buffer reuse case) - * - * Since pb managers are per-winsys managers, not per-context managers, - * and we shouldn't reuse buffers if they are in-use in any context, - * we simply ask: is this buffer referenced by *any* CS? - * - * The problem with buffer_create is that it comes from pipe_screen, - * so we have no CS to look at, though luckily the following code - * is sufficient to tell whether the buffer is in use. */ - if (flags & PB_USAGE_DONTBLOCK) { - if (_buf->base.usage & RADEON_PB_USAGE_VERTEX) - if (radeon_bo_is_referenced_by_cs(buf->bo, NULL)) - return NULL; - } - - if (buf->bo->ptr != NULL) { - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - /* Now test ptr again inside the mutex. We might have gotten a race - * during the first test. */ - if (buf->bo->ptr != NULL) { - remove_from_list(buf); - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - return buf->bo->ptr; - } - - if (flags & PB_USAGE_DONTBLOCK) { - uint32_t domain; - if (radeon_bo_is_busy(buf->bo, &domain)) - return NULL; - } - - /* If we don't have any CS and the buffer is referenced, - * we cannot flush. */ - assert(cs || !radeon_bo_is_referenced_by_cs(buf->bo, NULL)); - - if (cs && radeon_bo_is_referenced_by_cs(buf->bo, NULL)) { - cs->flush_cs(cs->flush_data); - } - - if (flags & PB_USAGE_CPU_WRITE) { - write = 1; - } - - if (radeon_bo_map(buf->bo, write)) { - return NULL; - } - - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - remove_from_list(buf); - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); - return buf->bo->ptr; -} - -static void -radeon_drm_buffer_unmap_internal(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); - pipe_mutex_lock(buf->mgr->buffer_map_list_mutex); - if (is_empty_list(buf)) { /* = is not inserted... */ - insert_at_tail(&buf->mgr->buffer_map_list, buf); - } - pipe_mutex_unlock(buf->mgr->buffer_map_list_mutex); -} - -static void -radeon_drm_buffer_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - - -static enum pipe_error -radeon_drm_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - /* Always pinned */ - return PIPE_OK; -} - -static void -radeon_drm_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ -} - -const struct pb_vtbl radeon_drm_buffer_vtbl = { - radeon_drm_buffer_destroy, - radeon_drm_buffer_map_internal, - radeon_drm_buffer_unmap_internal, - radeon_drm_buffer_validate, - radeon_drm_buffer_fence, - radeon_drm_buffer_get_base_buffer, -}; - -static struct pb_buffer * -radeon_drm_bufmgr_create_buffer_from_handle_unsafe(struct pb_manager *_mgr, - uint32_t handle) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_winsys *rws = mgr->rws; - struct radeon_drm_buffer *buf; - struct radeon_bo *bo; - - buf = util_hash_table_get(mgr->buffer_handles, (void*)(uintptr_t)handle); - - if (buf) { - struct pb_buffer *b = NULL; - pb_reference(&b, &buf->base); - return b; - } - - bo = radeon_bo_open(rws->bom, handle, 0, - 0, 0, 0); - if (bo == NULL) - return NULL; - - buf = CALLOC_STRUCT(radeon_drm_buffer); - if (!buf) { - radeon_bo_unref(bo); - return NULL; - } - - make_empty_list(buf); - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = 0; - buf->base.base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ; - buf->base.base.size = bo->size; - buf->base.vtbl = &radeon_drm_buffer_vtbl; - buf->mgr = mgr; - - buf->bo = bo; - - util_hash_table_set(mgr->buffer_handles, (void*)(uintptr_t)handle, buf); - - return &buf->base; -} - -struct pb_buffer * -radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, - uint32_t handle) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct pb_buffer *pb; - - pipe_mutex_lock(mgr->buffer_handles_mutex); - pb = radeon_drm_bufmgr_create_buffer_from_handle_unsafe(_mgr, handle); - pipe_mutex_unlock(mgr->buffer_handles_mutex); - - return pb; -} - -static struct pb_buffer * -radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_winsys *rws = mgr->rws; - struct radeon_drm_buffer *buf; - uint32_t domain; - - buf = CALLOC_STRUCT(radeon_drm_buffer); - if (!buf) - goto error1; - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = desc->alignment; - buf->base.base.usage = desc->usage; - buf->base.base.size = size; - buf->base.vtbl = &radeon_drm_buffer_vtbl; - buf->mgr = mgr; - - make_empty_list(buf); - - domain = - (desc->usage & RADEON_PB_USAGE_DOMAIN_GTT ? RADEON_GEM_DOMAIN_GTT : 0) | - (desc->usage & RADEON_PB_USAGE_DOMAIN_VRAM ? RADEON_GEM_DOMAIN_VRAM : 0); - - buf->bo = radeon_bo_open(rws->bom, 0, size, - desc->alignment, domain, 0); - if (buf->bo == NULL) - goto error2; - - return &buf->base; - - error2: - FREE(buf); - error1: - return NULL; -} - -static void -radeon_drm_bufmgr_flush(struct pb_manager *mgr) -{ - /* NOP */ -} - -static void -radeon_drm_bufmgr_destroy(struct pb_manager *_mgr) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - util_hash_table_destroy(mgr->buffer_handles); - pipe_mutex_destroy(mgr->buffer_map_list_mutex); - pipe_mutex_destroy(mgr->buffer_handles_mutex); - FREE(mgr); -} - -static unsigned handle_hash(void *key) -{ - return (unsigned)key; -} - -static int handle_compare(void *key1, void *key2) -{ - return !((int)key1 == (int)key2); -} - -struct pb_manager * -radeon_drm_bufmgr_create(struct radeon_drm_winsys *rws) -{ - struct radeon_drm_bufmgr *mgr; - - mgr = CALLOC_STRUCT(radeon_drm_bufmgr); - if (!mgr) - return NULL; - - mgr->base.destroy = radeon_drm_bufmgr_destroy; - mgr->base.create_buffer = radeon_drm_bufmgr_create_buffer; - mgr->base.flush = radeon_drm_bufmgr_flush; - - mgr->rws = rws; - make_empty_list(&mgr->buffer_map_list); - mgr->buffer_handles = util_hash_table_create(handle_hash, handle_compare); - pipe_mutex_init(mgr->buffer_map_list_mutex); - pipe_mutex_init(mgr->buffer_handles_mutex); - return &mgr->base; -} - -static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) -{ - struct radeon_drm_buffer *buf = NULL; - - if (_buf->vtbl == &radeon_drm_buffer_vtbl) { - buf = radeon_drm_buffer(_buf); - } else { - struct pb_buffer *base_buf; - pb_size offset; - pb_get_base_buffer(_buf, &base_buf, &offset); - - if (base_buf->vtbl == &radeon_drm_buffer_vtbl) - buf = radeon_drm_buffer(base_buf); - } - - return buf; -} - -static void *radeon_drm_buffer_map(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - struct r300_winsys_cs *cs, - enum pipe_transfer_usage usage) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), radeon_drm_cs(cs)); -} - -static void radeon_drm_buffer_unmap(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buf); - - pb_unmap(_buf); -} - -boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, - struct winsys_handle *whandle) -{ - struct drm_gem_flink flink; - struct radeon_drm_buffer *buf = get_drm_buffer(_buf); - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { - if (!buf->flinked) { - flink.handle = buf->bo->handle; - - if (ioctl(buf->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { - return FALSE; - } - - buf->flinked = TRUE; - buf->flink = flink.name; - } - whandle->handle = buf->flink; - } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { - whandle->handle = buf->bo->handle; - } - return TRUE; -} - -static void radeon_drm_buffer_get_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled) -{ - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - uint32_t flags = 0, pitch; - - radeon_bo_get_tiling(buf->bo, &flags, &pitch); - - *microtiled = R300_BUFFER_LINEAR; - *macrotiled = R300_BUFFER_LINEAR; - if (flags & RADEON_BO_FLAGS_MICRO_TILE) - *microtiled = R300_BUFFER_TILED; - - if (flags & RADEON_BO_FLAGS_MACRO_TILE) - *macrotiled = R300_BUFFER_TILED; -} - -static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled, - uint32_t pitch) -{ - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - uint32_t flags = 0; - if (microtiled == R300_BUFFER_TILED) - flags |= RADEON_BO_FLAGS_MICRO_TILE; -/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ -#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE - else if (microtiled == R300_BUFFER_SQUARETILED) - flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE; -#endif - if (macrotiled == R300_BUFFER_TILED) - flags |= RADEON_BO_FLAGS_MACRO_TILE; - - radeon_bo_set_tiling(buf->bo, flags, pitch); -} - -static struct r300_winsys_cs_buffer *radeon_drm_get_cs_handle( - struct r300_winsys_screen *rws, - struct r300_winsys_buffer *_buf) -{ - /* return pure radeon_bo. */ - return (struct r300_winsys_cs_buffer*) - get_drm_buffer(radeon_pb_buffer(_buf))->bo; -} - -static boolean radeon_drm_is_buffer_referenced(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *_buf, - enum r300_reference_domain domain) -{ - struct radeon_bo *bo = (struct radeon_bo*)_buf; - uint32_t tmp; - - if (domain & R300_REF_CS) { - if (radeon_bo_is_referenced_by_cs(bo, NULL)) { - return TRUE; - } - } - - if (domain & R300_REF_HW) { - if (radeon_bo_is_busy(bo, &tmp)) { - return TRUE; - } - } - - return FALSE; -} - -void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) -{ - struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); - struct radeon_drm_buffer *rpb, *t_rpb; - - pipe_mutex_lock(mgr->buffer_map_list_mutex); - - foreach_s(rpb, t_rpb, &mgr->buffer_map_list) { - radeon_bo_unmap(rpb->bo); - rpb->bo->ptr = NULL; - remove_from_list(rpb); - } - - make_empty_list(&mgr->buffer_map_list); - - pipe_mutex_unlock(mgr->buffer_map_list_mutex); -} - -static void radeon_drm_buffer_wait(struct r300_winsys_screen *ws, - struct r300_winsys_buffer *_buf) -{ - struct radeon_drm_buffer *buf = get_drm_buffer(radeon_pb_buffer(_buf)); - - radeon_bo_wait(buf->bo); -} - -void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws) -{ - ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; - ws->base.buffer_set_tiling = radeon_drm_buffer_set_tiling; - ws->base.buffer_get_tiling = radeon_drm_buffer_get_tiling; - ws->base.buffer_map = radeon_drm_buffer_map; - ws->base.buffer_unmap = radeon_drm_buffer_unmap; - ws->base.buffer_wait = radeon_drm_buffer_wait; - ws->base.cs_is_buffer_referenced = radeon_drm_is_buffer_referenced; -} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index fe71f080592..4676c2a1ea9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -1,5 +1,6 @@ /* * Copyright © 2009 Corbin Simpson + * Copyright © 2011 Marek Olšák <[email protected]> * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining @@ -27,38 +28,35 @@ * Authors: * Corbin Simpson <[email protected]> * Joakim Sindholt <[email protected]> + * Marek Olšák <[email protected]> */ #include "radeon_winsys.h" -#include "radeon_drm_buffer.h" +#include "radeon_drm_bo.h" #include "radeon_drm_cs.h" #include "radeon_drm_public.h" #include "pipebuffer/pb_bufmgr.h" #include "util/u_memory.h" -#include <radeon_drm.h> -#include <radeon_bo_gem.h> -#include <radeon_cs_gem.h> #include <xf86drm.h> #include <stdio.h> - -/* Enable/disable Hyper-Z access. Return TRUE on success. */ -static boolean radeon_set_hyperz_access(int fd, boolean enable) -{ #ifndef RADEON_INFO_WANT_HYPERZ #define RADEON_INFO_WANT_HYPERZ 7 #endif +#ifndef RADEON_INFO_WANT_CMASK +#define RADEON_INFO_WANT_CMASK 8 +#endif +/* Enable/disable feature access. Return TRUE on success. */ +static boolean radeon_set_fd_access(int fd, unsigned request, boolean enable) +{ struct drm_radeon_info info = {0}; unsigned value = enable ? 1 : 0; - if (!debug_get_bool_option("RADEON_HYPERZ", FALSE)) - return FALSE; - info.value = (unsigned long)&value; - info.request = RADEON_INFO_WANT_HYPERZ; + info.request = request; if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) return FALSE; @@ -107,23 +105,9 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) exit(1); } -/* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ -#ifdef RADEON_BO_FLAGS_MICRO_TILE_SQUARE - // Supported since 2.1.0. - winsys->squaretiling = version->version_major > 2 || - version->version_minor >= 1; -#endif - - winsys->drm_2_3_0 = version->version_major > 2 || - version->version_minor >= 3; - - winsys->drm_2_6_0 = version->version_major > 2 || - (version->version_major == 2 && - version->version_minor >= 6); - - winsys->drm_2_8_0 = version->version_major > 2 || - (version->version_major == 2 && - version->version_minor >= 8); + winsys->drm_major = version->version_major; + winsys->drm_minor = version->version_minor; + winsys->drm_patchlevel = version->version_patchlevel; info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info)); @@ -152,7 +136,15 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) } winsys->z_pipes = target; - winsys->hyperz = radeon_set_hyperz_access(winsys->fd, TRUE); + if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) { + winsys->hyperz = radeon_set_fd_access(winsys->fd, + RADEON_INFO_WANT_HYPERZ, TRUE); + } + + if (debug_get_bool_option("RADEON_CMASK", FALSE)) { + winsys->aacompress = radeon_set_fd_access(winsys->fd, + RADEON_INFO_WANT_CMASK, TRUE); + } retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); @@ -164,17 +156,6 @@ static void do_ioctls(struct radeon_drm_winsys *winsys) winsys->gart_size = gem_info.gart_size; winsys->vram_size = gem_info.vram_size; - debug_printf("radeon: Successfully grabbed chipset info from kernel!\n" - "radeon: DRM version: %d.%d.%d ID: 0x%04x GB: %d Z: %d\n" - "radeon: GART size: %d MB VRAM size: %d MB\n" - "radeon: HyperZ: %s\n", - version->version_major, version->version_minor, - version->version_patchlevel, winsys->pci_id, - winsys->gb_pipes, winsys->z_pipes, - winsys->gart_size / 1024 / 1024, - winsys->vram_size / 1024 / 1024, - winsys->hyperz ? "YES" : "NO"); - drmFreeVersion(version); } @@ -184,11 +165,47 @@ static void radeon_winsys_destroy(struct r300_winsys_screen *rws) ws->cman->destroy(ws->cman); ws->kman->destroy(ws->kman); - - radeon_bo_manager_gem_dtor(ws->bom); FREE(rws); } +static uint32_t radeon_get_value(struct r300_winsys_screen *rws, + enum r300_value_id id) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; + + switch(id) { + case R300_VID_PCI_ID: + return ws->pci_id; + case R300_VID_GB_PIPES: + return ws->gb_pipes; + case R300_VID_Z_PIPES: + return ws->z_pipes; + case R300_VID_GART_SIZE: + return ws->gart_size; + case R300_VID_VRAM_SIZE: + return ws->vram_size; + case R300_VID_DRM_MAJOR: + return ws->drm_major; + case R300_VID_DRM_MINOR: + return ws->drm_minor; + case R300_VID_DRM_PATCHLEVEL: + return ws->drm_patchlevel; + case R300_VID_DRM_2_1_0: + return ws->drm_major*100 + ws->drm_minor >= 201; + case R300_VID_DRM_2_3_0: + return ws->drm_major*100 + ws->drm_minor >= 203; + case R300_VID_DRM_2_6_0: + return ws->drm_major*100 + ws->drm_minor >= 206; + case R300_VID_DRM_2_8_0: + return ws->drm_major*100 + ws->drm_minor >= 208; + case R300_CAN_HYPERZ: + return ws->hyperz; + case R300_CAN_AACOMPRESS: + return ws->aacompress; + } + return 0; +} + struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) { struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys); @@ -204,10 +221,7 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) } /* Create managers. */ - ws->bom = radeon_bo_manager_gem_ctor(fd); - if (!ws->bom) - goto fail; - ws->kman = radeon_drm_bufmgr_create(ws); + ws->kman = radeon_bomgr_create(ws); if (!ws->kman) goto fail; ws->cman = pb_cache_manager_create(ws->kman, 1000000); @@ -216,22 +230,18 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) /* Set functions. */ ws->base.destroy = radeon_winsys_destroy; + ws->base.get_value = radeon_get_value; - radeon_drm_bufmgr_init_functions(ws); + radeon_bomgr_init_functions(ws); radeon_drm_cs_init_functions(ws); - radeon_winsys_init_functions(ws); return &ws->base; fail: - if (ws->bom) - radeon_bo_manager_gem_dtor(ws->bom); - if (ws->cman) ws->cman->destroy(ws->cman); if (ws->kman) ws->kman->destroy(ws->kman); - FREE(ws); return NULL; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 60bc36b0929..a38b01048b2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -63,17 +63,73 @@ */ #include "radeon_drm_cs.h" -#include "radeon_drm_buffer.h" #include "util/u_memory.h" +#include <stdio.h> #include <stdlib.h> #include <stdint.h> -#include <radeon_bo.h> #include <xf86drm.h> #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) +{ + csc->fd = fd; + csc->nrelocs = 512; + csc->relocs_bo = (struct radeon_bo**) + CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); + if (!csc->relocs_bo) { + return FALSE; + } + + csc->relocs = (struct drm_radeon_cs_reloc*) + CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); + if (!csc->relocs) { + FREE(csc->relocs_bo); + return FALSE; + } + + csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + csc->chunks[0].length_dw = 0; + csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; + csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + csc->chunks[1].length_dw = 0; + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; + + csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; + csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; + + csc->cs.num_chunks = 2; + csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; + return TRUE; +} + +static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) +{ + unsigned i; + + for (i = 0; i < csc->crelocs; i++) { + p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); + radeon_bo_unref(csc->relocs_bo[i]); + csc->relocs_bo[i] = NULL; + } + + csc->crelocs = 0; + csc->chunks[0].length_dw = 0; + csc->chunks[1].length_dw = 0; + csc->used_gart = 0; + csc->used_vram = 0; + memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added)); +} + +static void radeon_destroy_cs_context(struct radeon_cs_context *csc) +{ + radeon_cs_context_cleanup(csc); + FREE(csc->relocs_bo); + FREE(csc->relocs); +} + static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); @@ -85,34 +141,29 @@ static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rw } cs->ws = ws; - cs->nrelocs = 256; - cs->relocs_bo = (struct radeon_bo**) - CALLOC(1, cs->nrelocs * sizeof(struct radeon_bo*)); - if (!cs->relocs_bo) { + + if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { FREE(cs); return NULL; } - - cs->relocs = (struct drm_radeon_cs_reloc*) - CALLOC(1, cs->nrelocs * sizeof(struct drm_radeon_cs_reloc)); - if (!cs->relocs) { - FREE(cs->relocs_bo); + if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { + radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; } - cs->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; - cs->chunks[0].length_dw = 0; - cs->chunks[0].chunk_data = (uint64_t)(uintptr_t)cs->base.buf; - cs->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; - cs->chunks[1].length_dw = 0; - cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + /* Set the first command buffer as current. */ + cs->csc = &cs->csc1; + cs->cst = &cs->csc2; + cs->base.buf = cs->csc->buf; + + p_atomic_inc(&ws->num_cs); return &cs->base; } #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) -static inline void update_domains(struct drm_radeon_cs_reloc *reloc, +static INLINE void update_domains(struct drm_radeon_cs_reloc *reloc, enum r300_buffer_domain rd, enum r300_buffer_domain wd, enum r300_buffer_domain *added_domains) @@ -131,23 +182,22 @@ static inline void update_domains(struct drm_radeon_cs_reloc *reloc, } } -static int radeon_get_reloc(struct radeon_drm_cs *cs, - struct radeon_bo *bo) +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) { struct drm_radeon_cs_reloc *reloc; unsigned i; - unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); - if (cs->is_handle_added[hash]) { - reloc = cs->relocs_hashlist[hash]; + if (csc->is_handle_added[hash]) { + reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { - return cs->reloc_indices_hashlist[hash]; + return csc->reloc_indices_hashlist[hash]; } /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = cs->crelocs; i != 0;) { + for (i = csc->crelocs; i != 0;) { --i; - reloc = &cs->relocs[i]; + reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { /* Put this reloc in the hash list. * This will prevent additional hash collisions if there are @@ -158,8 +208,8 @@ static int radeon_get_reloc(struct radeon_drm_cs *cs, * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC * will collide here: ^ and here: ^, * meaning that we should get very few collisions in the end. */ - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = i; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ return i; } @@ -169,7 +219,7 @@ static int radeon_get_reloc(struct radeon_drm_cs *cs, return -1; } -static void radeon_add_reloc(struct radeon_drm_cs *cs, +static void radeon_add_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo, enum r300_buffer_domain rd, enum r300_buffer_domain wd, @@ -177,24 +227,24 @@ static void radeon_add_reloc(struct radeon_drm_cs *cs, { struct drm_radeon_cs_reloc *reloc; unsigned i; - unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); - if (cs->is_handle_added[hash]) { - reloc = cs->relocs_hashlist[hash]; + if (csc->is_handle_added[hash]) { + reloc = csc->relocs_hashlist[hash]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); return; } /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = cs->crelocs; i != 0;) { + for (i = csc->crelocs; i != 0;) { --i; - reloc = &cs->relocs[i]; + reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { update_domains(reloc, rd, wd, added_domains); - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = i; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ return; } @@ -202,40 +252,41 @@ static void radeon_add_reloc(struct radeon_drm_cs *cs, } /* New relocation, check if the backing array is large enough. */ - if (cs->crelocs >= cs->nrelocs) { + if (csc->crelocs >= csc->nrelocs) { uint32_t size; - cs->nrelocs += 10; + csc->nrelocs += 10; - size = cs->nrelocs * sizeof(struct radeon_bo*); - cs->relocs_bo = (struct radeon_bo**)realloc(cs->relocs_bo, size); + size = csc->nrelocs * sizeof(struct radeon_bo*); + csc->relocs_bo = (struct radeon_bo**)realloc(csc->relocs_bo, size); - size = cs->nrelocs * sizeof(struct drm_radeon_cs_reloc); - cs->relocs = (struct drm_radeon_cs_reloc*)realloc(cs->relocs, size); + size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); + csc->relocs = (struct drm_radeon_cs_reloc*)realloc(csc->relocs, size); - cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; } /* Initialize the new relocation. */ radeon_bo_ref(bo); - cs->relocs_bo[cs->crelocs] = bo; - reloc = &cs->relocs[cs->crelocs]; + p_atomic_inc(&bo->num_cs_references); + csc->relocs_bo[csc->crelocs] = bo; + reloc = &csc->relocs[csc->crelocs]; reloc->handle = bo->handle; reloc->read_domains = rd; reloc->write_domain = wd; reloc->flags = 0; - cs->is_handle_added[hash] = TRUE; - cs->relocs_hashlist[hash] = reloc; - cs->reloc_indices_hashlist[hash] = cs->crelocs; + csc->is_handle_added[hash] = TRUE; + csc->relocs_hashlist[hash] = reloc; + csc->reloc_indices_hashlist[hash] = csc->crelocs; - cs->chunks[1].length_dw += RELOC_DWORDS; - cs->crelocs++; + csc->chunks[1].length_dw += RELOC_DWORDS; + csc->crelocs++; *added_domains = rd | wd; } static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *buf, + struct r300_winsys_cs_handle *buf, enum r300_buffer_domain rd, enum r300_buffer_domain wd) { @@ -243,32 +294,32 @@ static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, struct radeon_bo *bo = (struct radeon_bo*)buf; enum r300_buffer_domain added_domains; - radeon_add_reloc(cs, bo, rd, wd, &added_domains); + radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains); if (!added_domains) return; if (added_domains & R300_DOMAIN_GTT) - cs->used_gart += bo->size; + cs->csc->used_gart += bo->size; if (added_domains & R300_DOMAIN_VRAM) - cs->used_vram += bo->size; + cs->csc->used_vram += bo->size; } static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - return cs->used_gart < cs->ws->gart_size * 0.8 && - cs->used_vram < cs->ws->vram_size * 0.8; + return cs->csc->used_gart < cs->ws->gart_size * 0.8 && + cs->csc->used_vram < cs->ws->vram_size * 0.8; } static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *buf) + struct r300_winsys_cs_handle *buf) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; - unsigned index = radeon_get_reloc(cs, bo); + unsigned index = radeon_get_reloc(cs->csc, bo); if (index == -1) { fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__); @@ -279,75 +330,109 @@ static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, OUT_CS(&cs->base, index * RELOC_DWORDS); } -static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - uint64_t chunk_array[2]; + struct radeon_cs_context *csc = (struct radeon_cs_context*)param; unsigned i; - int r; - if (cs->base.cdw) { - /* Unmap buffers. */ - radeon_drm_bufmgr_flush_maps(cs->ws->kman); - - /* Prepare the arguments. */ - cs->chunks[0].length_dw = cs->base.cdw; - - chunk_array[0] = (uint64_t)(uintptr_t)&cs->chunks[0]; - chunk_array[1] = (uint64_t)(uintptr_t)&cs->chunks[1]; - - cs->cs.num_chunks = 2; - cs->cs.chunks = (uint64_t)(uintptr_t)chunk_array; - - /* Emit. */ - r = drmCommandWriteRead(cs->ws->fd, DRM_RADEON_CS, - &cs->cs, sizeof(struct drm_radeon_cs)); - if (r) { - if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { - fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); - fprintf(stderr, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, - cs->ws->pci_id); - for (i = 0; i < cs->base.cdw; i++) { - fprintf(stderr, "0x%08X\n", cs->base.buf[i]); - } - } else { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); + if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS, + &csc->cs, sizeof(struct drm_radeon_cs))) { + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + unsigned i; + + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + for (i = 0; i < csc->chunks[0].length_dw; i++) { + fprintf(stderr, "0x%08X\n", csc->buf[i]); } + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); } } - /* Unreference buffers, cleanup. */ - for (i = 0; i < cs->crelocs; i++) { - radeon_bo_unref((struct radeon_bo*)cs->relocs_bo[i]); - cs->relocs_bo[i] = NULL; + for (i = 0; i < csc->crelocs; i++) + p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); + return NULL; +} + +void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + /* Wait for any pending ioctl to complete. */ + if (cs->thread) { + pipe_thread_wait(cs->thread); + cs->thread = 0; } +} + +DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) + +void radeon_drm_cs_flush(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_cs_context *tmp; + + radeon_drm_cs_sync_flush(rcs); + + /* If the CS is not empty, emit it in a newly-spawned thread. */ + if (cs->base.cdw) { + unsigned i, crelocs = cs->csc->crelocs; + cs->csc->chunks[0].length_dw = cs->base.cdw; + + for (i = 0; i < crelocs; i++) + p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); + + if (debug_get_option_thread()) { + cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc); + assert(cs->thread); + } else { + radeon_drm_cs_emit_ioctl(cs->csc); + } + } + + /* Flip command streams. */ + tmp = cs->csc; + cs->csc = cs->cst; + cs->cst = tmp; + + /* Prepare a new CS. */ + radeon_cs_context_cleanup(cs->csc); + + cs->base.buf = cs->csc->buf; cs->base.cdw = 0; - cs->crelocs = 0; - cs->chunks[0].length_dw = 0; - cs->chunks[1].length_dw = 0; - cs->used_gart = 0; - cs->used_vram = 0; - memset(cs->is_handle_added, 0, sizeof(cs->is_handle_added)); } static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - FREE(cs->relocs_bo); - FREE(cs->relocs); + radeon_drm_cs_sync_flush(rcs); + radeon_cs_context_cleanup(&cs->csc1); + radeon_cs_context_cleanup(&cs->csc2); + p_atomic_dec(&cs->ws->num_cs); + radeon_destroy_cs_context(&cs->csc1); + radeon_destroy_cs_context(&cs->csc2); FREE(cs); } static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs, - void (*flush)(void *), void *user) + void (*flush)(void *), void *user) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); cs->flush_cs = flush; cs->flush_data = user; } +static boolean radeon_bo_is_referenced(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_handle *_buf) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)_buf; + + return radeon_bo_is_referenced_by_cs(cs, bo); +} + void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { ws->base.cs_create = radeon_drm_cs_create; @@ -355,6 +440,8 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; - ws->base.cs_flush = radeon_drm_cs_emit; + ws->base.cs_flush = radeon_drm_cs_flush; + ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; ws->base.cs_set_flush = radeon_drm_cs_set_flush; + ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 76046534b65..4cc97f37e09 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -1,42 +1,103 @@ +/* + * Copyright © 2011 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + #ifndef RADEON_DRM_CS_H #define RADEON_DRM_CS_H -#include "radeon_winsys.h" +#include "radeon_drm_bo.h" #include <radeon_drm.h> -struct radeon_drm_cs { - struct r300_winsys_cs base; - - /* The winsys. */ - struct radeon_drm_winsys *ws; +struct radeon_cs_context { + uint32_t buf[R300_MAX_CMDBUF_DWORDS]; - /* Flush CS. */ - void (*flush_cs)(void *); - void *flush_data; + int fd; + struct drm_radeon_cs cs; + struct drm_radeon_cs_chunk chunks[2]; + uint64_t chunk_array[2]; /* Relocs. */ - unsigned crelocs; unsigned nrelocs; - struct drm_radeon_cs_reloc *relocs; + unsigned crelocs; struct radeon_bo **relocs_bo; - struct drm_radeon_cs cs; - struct drm_radeon_cs_chunk chunks[2]; - - unsigned used_vram; - unsigned used_gart; + struct drm_radeon_cs_reloc *relocs; /* 0 = BO not added, 1 = BO added */ char is_handle_added[256]; struct drm_radeon_cs_reloc *relocs_hashlist[256]; unsigned reloc_indices_hashlist[256]; + + unsigned used_vram; + unsigned used_gart; }; +struct radeon_drm_cs { + struct r300_winsys_cs base; + + /* We flip between these two CS. While one is being consumed + * by the kernel in another thread, the other one is being filled + * by the pipe driver. */ + struct radeon_cs_context csc1; + struct radeon_cs_context csc2; + /* The currently-used CS. */ + struct radeon_cs_context *csc; + /* The CS being currently-owned by the other thread. */ + struct radeon_cs_context *cst; + + /* The winsys. */ + struct radeon_drm_winsys *ws; + + /* Flush CS. */ + void (*flush_cs)(void *); + void *flush_data; + + pipe_thread thread; +}; + +int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo); + static INLINE struct radeon_drm_cs * radeon_drm_cs(struct r300_winsys_cs *base) { return (struct radeon_drm_cs*)base; } +static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + return bo->num_cs_references == bo->rws->num_cs || + (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); +} + +static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) +{ + return bo->num_cs_references; +} + +void radeon_drm_cs_flush(struct r300_winsys_cs *rcs); +void radeon_drm_cs_sync_flush(struct r300_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c deleted file mode 100644 index bacf181b47c..00000000000 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "radeon_drm_buffer.h" - -#include "util/u_memory.h" -#include "pipebuffer/pb_bufmgr.h" - -#include "state_tracker/drm_driver.h" - -static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, - enum r300_buffer_domain domain) -{ - unsigned res = 0; - - if (bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | - PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT)) - res |= PB_USAGE_GPU_WRITE; - - if (bind & PIPE_BIND_SAMPLER_VIEW) - res |= PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE; - - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - res |= PB_USAGE_GPU_READ; - - if (bind & PIPE_BIND_TRANSFER_WRITE) - res |= PB_USAGE_CPU_WRITE; - - if (bind & PIPE_BIND_TRANSFER_READ) - res |= PB_USAGE_CPU_READ; - - /* Is usage of any use for us? Probably not. */ - - /* Now add driver-specific usage flags. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - res |= RADEON_PB_USAGE_VERTEX; - - if (domain & R300_DOMAIN_GTT) - res |= RADEON_PB_USAGE_DOMAIN_GTT; - - if (domain & R300_DOMAIN_VRAM) - res |= RADEON_PB_USAGE_DOMAIN_VRAM; - - return res; -} - -static struct r300_winsys_buffer * -radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, - unsigned size, - unsigned alignment, - unsigned bind, - unsigned usage, - enum r300_buffer_domain domain) -{ - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct pb_desc desc; - struct pb_manager *provider; - struct pb_buffer *buffer; - - memset(&desc, 0, sizeof(desc)); - desc.alignment = alignment; - desc.usage = get_pb_usage_from_create_flags(bind, usage, domain); - - /* Assign a buffer manager. */ - if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - provider = ws->cman; - else - provider = ws->kman; - - buffer = provider->create_buffer(provider, size, &desc); - if (!buffer) - return NULL; - - return (struct r300_winsys_buffer*)buffer; -} - -static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src) -{ - struct pb_buffer *_src = radeon_pb_buffer(src); - struct pb_buffer *_dst = radeon_pb_buffer(*pdst); - - pb_reference(&_dst, _src); - - *pdst = (struct r300_winsys_buffer*)_dst; -} - -static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, - struct winsys_handle *whandle, - unsigned *stride, - unsigned *size) -{ - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct pb_buffer *_buf; - - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); - - if (stride) - *stride = whandle->stride; - if (size) - *size = _buf->base.size; - - return (struct r300_winsys_buffer*)_buf; -} - -static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buffer, - unsigned stride, - struct winsys_handle *whandle) -{ - struct pb_buffer *_buf = radeon_pb_buffer(buffer); - whandle->stride = stride; - return radeon_drm_bufmgr_get_handle(_buf, whandle); -} - -static uint32_t radeon_get_value(struct r300_winsys_screen *rws, - enum r300_value_id id) -{ - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws; - - switch(id) { - case R300_VID_PCI_ID: - return ws->pci_id; - case R300_VID_GB_PIPES: - return ws->gb_pipes; - case R300_VID_Z_PIPES: - return ws->z_pipes; - case R300_VID_SQUARE_TILING_SUPPORT: - return ws->squaretiling; - case R300_VID_DRM_2_3_0: - return ws->drm_2_3_0; - case R300_VID_DRM_2_6_0: - return ws->drm_2_6_0; - case R300_VID_DRM_2_8_0: - return ws->drm_2_8_0; - case R300_CAN_HYPERZ: - return ws->hyperz; - } - return 0; -} - -void radeon_winsys_init_functions(struct radeon_drm_winsys *ws) -{ - ws->base.get_value = radeon_get_value; - ws->base.buffer_create = radeon_r300_winsys_buffer_create; - ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; - ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; - ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; -} diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 492edfef8c3..f8a89abcfe4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -36,8 +36,8 @@ struct radeon_drm_winsys { struct r300_winsys_screen base; int fd; /* DRM file descriptor */ + int num_cs; /* The number of command streams created. */ - struct radeon_bo_manager *bom; /* Radeon BO manager. */ struct pb_manager *kman; struct pb_manager *cman; @@ -46,15 +46,15 @@ struct radeon_drm_winsys { uint32_t z_pipes; /* Z pipe count (rv530 only) */ uint32_t gart_size; /* GART size. */ uint32_t vram_size; /* VRAM size. */ - boolean squaretiling; /* Square tiling support. */ - /* DRM 2.3.0 (R500 VAP regs, MSPOS regs, fixed tex3D size checking) */ - boolean drm_2_3_0; - /* DRM 2.6.0 (Hyper-Z, GB_Z_PEQ_CONFIG allowed on rv350->r4xx, FG_ALPHA_VALUE) */ - boolean drm_2_6_0; - /* DRM 2.8.0 (US_FORMAT regs, ARGB2101010 colorbuffer) */ - boolean drm_2_8_0; + + unsigned drm_major; + unsigned drm_minor; + unsigned drm_patchlevel; + /* Hyper-Z user */ boolean hyperz; + /* AA compression (CMask) */ + boolean aacompress; }; static INLINE struct radeon_drm_winsys * @@ -63,6 +63,4 @@ radeon_drm_winsys(struct r300_winsys_screen *base) return (struct radeon_drm_winsys*)base; } -void radeon_winsys_init_functions(struct radeon_drm_winsys *ws); - #endif diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c index eca174a6c56..80d5dfc3ed4 100644 --- a/src/gallium/winsys/svga/drm/vmw_buffer.c +++ b/src/gallium/winsys/svga/drm/vmw_buffer.c @@ -120,7 +120,8 @@ vmw_gmr_buffer_destroy(struct pb_buffer *_buf) static void * vmw_gmr_buffer_map(struct pb_buffer *_buf, - unsigned flags) + unsigned flags, + void *flush_ctx) { struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); return buf->map; diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index 11626ee637d..f2124c1bf65 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -320,7 +320,7 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc, * to the FIFO won't cause flushing in the host. */ vswc->seen_regions += reloc->buffer->base.size; - if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/2) + if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/3) vswc->preemptive_flush = TRUE; } diff --git a/src/gallium/winsys/sw/xlib/SConscript b/src/gallium/winsys/sw/xlib/SConscript index df01a9ec2bf..f6c47411831 100644 --- a/src/gallium/winsys/sw/xlib/SConscript +++ b/src/gallium/winsys/sw/xlib/SConscript @@ -4,7 +4,7 @@ Import('*') -if env['platform'] in ('cygwin', 'linux'): +if env['platform'] in ('cygwin_nt-5.1', 'cygwin_nt-6.1', 'linux'): env = env.Clone() |