diff options
author | José Fonseca <[email protected]> | 2010-01-31 05:36:33 +0000 |
---|---|---|
committer | José Fonseca <[email protected]> | 2010-01-31 05:36:33 +0000 |
commit | bee9964b29b2428ee75e2d1efc0e1d2c2518a417 (patch) | |
tree | fd733f886743b31f2ac2727e57f05d1a0918a977 /src/gallium/auxiliary | |
parent | 36a0819ff4ede1af91dcf909106cf20659856384 (diff) | |
parent | 12eb32e34244db9923cacaaed9ba951b7ac274a4 (diff) |
Merge remote branch 'origin/master' into lp-binning
Conflicts:
Makefile
src/gallium/auxiliary/util/u_surface.c
src/gallium/drivers/llvmpipe/lp_flush.c
src/gallium/drivers/llvmpipe/lp_setup.c
src/gallium/drivers/llvmpipe/lp_state_derived.c
src/gallium/drivers/llvmpipe/lp_state_fs.c
src/gallium/drivers/llvmpipe/lp_state_surface.c
src/gallium/drivers/llvmpipe/lp_tex_cache.c
src/gallium/drivers/llvmpipe/lp_texture.c
src/gallium/drivers/llvmpipe/lp_tile_cache.c
src/mesa/state_tracker/st_cb_condrender.c
Diffstat (limited to 'src/gallium/auxiliary')
56 files changed, 1631 insertions, 918 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 8f937e3b4e9..da1fb6b299f 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -48,12 +48,10 @@ C_SOURCES = \ draw/draw_vs_sse.c \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ - pipebuffer/pb_buffer_fenced.c \ pipebuffer/pb_buffer_malloc.c \ pipebuffer/pb_bufmgr_alt.c \ pipebuffer/pb_bufmgr_cache.c \ pipebuffer/pb_bufmgr_debug.c \ - pipebuffer/pb_bufmgr_fenced.c \ pipebuffer/pb_bufmgr_mm.c \ pipebuffer/pb_bufmgr_ondemand.c \ pipebuffer/pb_bufmgr_pool.c \ @@ -92,6 +90,7 @@ C_SOURCES = \ util/u_debug_dump.c \ util/u_debug_symbol.c \ util/u_debug_stack.c \ + util/u_bitmask.c \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f957090b5fb..3aa782f81e6 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -87,7 +87,6 @@ source = [ 'pipebuffer/pb_bufmgr_alt.c', 'pipebuffer/pb_bufmgr_cache.c', 'pipebuffer/pb_bufmgr_debug.c', - 'pipebuffer/pb_bufmgr_fenced.c', 'pipebuffer/pb_bufmgr_mm.c', 'pipebuffer/pb_bufmgr_ondemand.c', 'pipebuffer/pb_bufmgr_pool.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index e6dce3f0e5b..a6a07e72c2f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -113,26 +113,6 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_ return hash; } -static int _cso_size_for_type(enum cso_cache_type type) -{ - switch(type) { - case CSO_BLEND: - return sizeof(struct pipe_blend_state); - case CSO_SAMPLER: - return sizeof(struct pipe_sampler_state); - case CSO_DEPTH_STENCIL_ALPHA: - return sizeof(struct pipe_depth_stencil_alpha_state); - case CSO_RASTERIZER: - return sizeof(struct pipe_rasterizer_state); - case CSO_FRAGMENT_SHADER: - return sizeof(struct pipe_shader_state); - case CSO_VERTEX_SHADER: - return sizeof(struct pipe_shader_state); - } - return 0; -} - - static void delete_blend_state(void *state, void *data) { struct cso_blend *cso = (struct cso_blend *)state; @@ -282,10 +262,9 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash, struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, - void *templ) + void *templ, unsigned size) { struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); - int size = _cso_size_for_type(type); while (!cso_hash_iter_is_null(iter)) { void *iter_data = cso_hash_iter_data(iter); if (!memcmp(iter_data, templ, size)) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 6b5c230e8f2..eea60b940bb 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -160,7 +160,7 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, - void *templ); + void *templ, unsigned size); void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, cso_state_callback func, void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 2b16332e143..dec830ba93e 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -310,18 +310,21 @@ void cso_destroy_context( struct cso_context *ctx ) enum pipe_error cso_set_blend(struct cso_context *ctx, const struct pipe_blend_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_BLEND, - (void*)templ); + unsigned key_size, hash_key; + struct cso_hash_iter iter; void *handle; + key_size = templ->independent_blend_enable ? sizeof(struct pipe_blend_state) : + (char *)&(templ->rt[1]) - (char *)templ; + hash_key = cso_construct_key((void*)templ, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_BLEND, (void*)templ, key_size); + if (cso_hash_iter_is_null(iter)) { struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - memcpy(&cso->state, templ, sizeof(*templ)); + memcpy(&cso->state, templ, key_size); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; cso->context = ctx->pipe; @@ -369,10 +372,11 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, void *handle = NULL; if (templ != NULL) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_SAMPLER, - (void*)templ); + (void*)templ, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); @@ -409,10 +413,11 @@ cso_single_vertex_sampler(struct cso_context *ctx, void *handle = NULL; if (templ != NULL) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_SAMPLER, - (void*)templ); + (void*)templ, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); @@ -539,6 +544,38 @@ void cso_restore_samplers(struct cso_context *ctx) cso_single_sampler_done( ctx ); } +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = cso_single_vertex_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_vertex_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } + + cso_single_vertex_sampler_done( ctx ); + + return error; +} + void cso_save_vertex_samplers(struct cso_context *ctx) { @@ -666,12 +703,12 @@ cso_restore_vertex_sampler_textures(struct cso_context *ctx) enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_depth_stencil_alpha_state)); + unsigned key_size = sizeof(struct pipe_depth_stencil_alpha_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, - CSO_DEPTH_STENCIL_ALPHA, - (void*)templ); + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ, key_size); void *handle; if (cso_hash_iter_is_null(iter)) { @@ -723,11 +760,11 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx) enum pipe_error cso_set_rasterizer(struct cso_context *ctx, const struct pipe_rasterizer_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_rasterizer_state)); + unsigned key_size = sizeof(struct pipe_rasterizer_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_RASTERIZER, - (void*)templ); + (void*)templ, key_size); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { @@ -809,7 +846,8 @@ enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, - (void*)tokens); + (void*)tokens, + sizeof(*templ)); /* XXX correct? tokens_size? */ void *handle = NULL; if (cso_hash_iter_is_null(iter)) { @@ -888,7 +926,8 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, sizeof(struct pipe_shader_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, hash_key, CSO_VERTEX_SHADER, - (void*)templ); + (void*)templ, + sizeof(*templ)); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index b9e313e32d6..d2089b1c883 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,6 +84,10 @@ enum pipe_error cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); +enum pipe_error cso_set_vertex_samplers(struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states); + void cso_save_vertex_samplers(struct cso_context *cso); diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index e90dfc5aec4..d3084fd4283 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,11 +34,8 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "draw_context.h" -#include "draw_vbuf.h" #include "draw_vs.h" #include "draw_gs.h" -#include "draw_pt.h" -#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -237,17 +234,20 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, unsigned shader_type, + unsigned slot, const void *buffer, unsigned size ) { debug_assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_GEOMETRY); + debug_assert(slot < PIPE_MAX_CONSTANT_BUFFERS); + if (shader_type == PIPE_SHADER_VERTEX) { - draw->pt.user.vs_constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); + draw->pt.user.vs_constants[slot] = buffer; + draw_vs_set_constants(draw, slot, buffer, size); } else if (shader_type == PIPE_SHADER_GEOMETRY) { - draw->pt.user.gs_constants = buffer; - draw_gs_set_constants( draw, (const float (*)[4])buffer, size ); + draw->pt.user.gs_constants[slot] = buffer; + draw_gs_set_constants(draw, slot, buffer, size); } } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 8a64c06efcd..acd81b9712d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -151,10 +151,12 @@ void draw_set_mapped_element_buffer( struct draw_context *draw, void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); -void draw_set_mapped_constant_buffer(struct draw_context *draw, - unsigned shader_type, - const void *buffer, - unsigned size ); +void +draw_set_mapped_constant_buffer(struct draw_context *draw, + unsigned shader_type, + unsigned slot, + const void *buffer, + unsigned size); /*********************************************************************** diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index daf8d071f12..7069aa6b181 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -69,9 +69,11 @@ void draw_gs_destroy( struct draw_context *draw ) tgsi_exec_machine_destroy(draw->gs.machine); } -void draw_gs_set_constants( struct draw_context *draw, - const float (*constants)[4], - unsigned size ) +void +draw_gs_set_constants(struct draw_context *draw, + unsigned slot, + const void *constants, + unsigned size) { } @@ -291,7 +293,7 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, void draw_geometry_shader_run(struct draw_geometry_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned vertex_size) @@ -302,7 +304,9 @@ void draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned num_primitives = count/num_vertices; unsigned inputs_from_vs = 0; - machine->Consts = constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = constants[i]; + } for (i = 0; i < shader->info.num_inputs; ++i) { if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index d6a97d9c4ef..d8eb2103433 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -62,7 +62,7 @@ struct draw_geometry_shader { void draw_geometry_shader_run(struct draw_geometry_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride); diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1c6d657297c..11d6485dcf0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -106,10 +106,9 @@ void draw_pipeline_destroy( struct draw_context *draw ) - - - - +/** + * Build primitive to render a point with vertex at v0. + */ static void do_point( struct draw_context *draw, const char *v0 ) { @@ -123,6 +122,10 @@ static void do_point( struct draw_context *draw, } +/** + * Build primitive to render a line with vertices at v0, v1. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_line( struct draw_context *draw, ushort flags, const char *v0, @@ -139,6 +142,10 @@ static void do_line( struct draw_context *draw, } +/** + * Build primitive to render a triangle with vertices at v0, v1, v2. + * \param flags bitmask of DRAW_PIPE_EDGE_x, DRAW_PIPE_RESET_STIPPLE + */ static void do_triangle( struct draw_context *draw, ushort flags, char *v0, @@ -157,7 +164,10 @@ static void do_triangle( struct draw_context *draw, } - +/* + * Set up macros for draw_pt_decompose.h template code. + * This code uses vertex indexes / elements. + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -175,16 +185,16 @@ static void do_triangle( struct draw_context *draw, #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ - elts[i0], /* flags */ \ + elts[i0], /* flags */ \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK) ); #define LINE(flags,i0,i1) \ do_line( draw, \ - elts[i0], \ + elts[i0], \ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * elts[i1]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK) ); #define POINT(i0) \ do_point( draw, \ @@ -213,7 +223,9 @@ static void do_triangle( struct draw_context *draw, -/* Code to run the pipeline on a fairly arbitary collection of vertices. +/** + * Code to run the pipeline on a fairly arbitary collection of vertices. + * For drawing indexed primitives. * * Vertex headers must be pre-initialized with the * UNDEFINED_VERTEX_ID, this code will cause that id to become @@ -243,6 +255,12 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } + + +/* + * Set up macros for draw_pt_decompose.h template code. + * This code is for non-indexed rendering (no elts). + */ #define QUAD(i0,i1,i2,i3) \ do_triangle( draw, \ ( DRAW_PIPE_RESET_STIPPLE | \ @@ -293,6 +311,10 @@ void draw_pipeline_run( struct draw_context *draw, #include "draw_pt_decompose.h" + +/* + * For drawing non-indexed primitives. + */ void draw_pipeline_run_linear( struct draw_context *draw, unsigned prim, struct vertex_header *vertices, diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 11b39db5990..dc66c65a56c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -50,8 +50,6 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) } - - static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { @@ -62,7 +60,7 @@ static void cull_tri( struct draw_stage *stage, const float *v1 = header->v[1]->data[pos]; const float *v2 = header->v[2]->data[pos]; - /* edge vectors e = v0 - v2, f = v1 - v2 */ + /* edge vectors: e = v0 - v2, f = v1 - v2 */ const float ex = v0[0] - v2[0]; const float ey = v0[1] - v2[1]; const float fx = v1[0] - v2[0]; @@ -72,7 +70,7 @@ static void cull_tri( struct draw_stage *stage, header->det = ex * fy - ey * fx; if (header->det != 0) { - /* if (det < 0 then Z points toward camera and triangle is + /* if det < 0 then Z points toward the camera and the triangle is * counter-clockwise winding. */ unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; @@ -84,6 +82,7 @@ static void cull_tri( struct draw_stage *stage, } } + static void cull_first_tri( struct draw_stage *stage, struct prim_header *header ) { @@ -96,13 +95,13 @@ static void cull_first_tri( struct draw_stage *stage, } - static void cull_flush( struct draw_stage *stage, unsigned flags ) { stage->tri = cull_first_tri; stage->next->flush( stage->next, flags ); } + static void cull_reset_stipple_counter( struct draw_stage *stage ) { stage->next->reset_stipple_counter( stage->next ); @@ -140,7 +139,7 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) return &cull->stage; - fail: +fail: if (cull) cull->stage.destroy( &cull->stage ); diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index bea90e50d30..a69e2633bec 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -151,8 +151,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) { struct draw_context *draw = stage->draw; struct draw_stage *next = draw->pipeline.rasterize; - int need_det = 0; - int precalc_flat = 0; + boolean need_det = FALSE; + boolean precalc_flat = FALSE; boolean wide_lines, wide_points; /* Set the validate's next stage to the rasterize stage, so that it @@ -194,7 +194,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (wide_lines) { draw->pipeline.wide_line->next = next; next = draw->pipeline.wide_line; - precalc_flat = 1; + precalc_flat = TRUE; } if (wide_points || draw->rasterizer->point_sprite) { @@ -205,7 +205,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; - precalc_flat = 1; /* only needed for lines really */ + precalc_flat = TRUE; /* only needed for lines really */ } if (draw->rasterizer->poly_stipple_enable @@ -218,8 +218,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; next = draw->pipeline.unfilled; - precalc_flat = 1; /* only needed for triangles really */ - need_det = 1; + precalc_flat = TRUE; /* only needed for triangles really */ + need_det = TRUE; } if (draw->rasterizer->flatshade && precalc_flat) { @@ -231,13 +231,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) draw->rasterizer->offset_ccw) { draw->pipeline.offset->next = next; next = draw->pipeline.offset; - need_det = 1; + need_det = TRUE; } if (draw->rasterizer->light_twoside) { draw->pipeline.twoside->next = next; next = draw->pipeline.twoside; - need_det = 1; + need_det = TRUE; } /* Always run the cull stage as we calculate determinant there diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index ef49e575366..6a7190e9750 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -153,8 +153,8 @@ struct draw_context const void *vbuffer[PIPE_MAX_ATTRIBS]; /** constant buffer (for vertex/geometry shader) */ - const void *vs_constants; - const void *gs_constants; + const void *vs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; } user; boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ @@ -202,10 +202,10 @@ struct draw_context struct aos_machine *aos_machine; - const float (*aligned_constants)[4]; + const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS]; - const float (*aligned_constant_storage)[4]; - unsigned const_storage_size; + const void *aligned_constant_storage[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned const_storage_size[PIPE_MAX_CONSTANT_BUFFERS]; struct translate *fetch; @@ -256,9 +256,11 @@ void draw_vs_destroy( struct draw_context *draw ); void draw_vs_set_viewport( struct draw_context *, const struct pipe_viewport_state * ); -void draw_vs_set_constants( struct draw_context *, - const float (*constants)[4], - unsigned size ); +void +draw_vs_set_constants(struct draw_context *, + unsigned slot, + const void *constants, + unsigned size); @@ -266,9 +268,13 @@ void draw_vs_set_constants( struct draw_context *, * Geometry shading code: */ boolean draw_gs_init( struct draw_context *draw ); -void draw_gs_set_constants( struct draw_context *, - const float (*constants)[4], - unsigned size ); + +void +draw_gs_set_constants(struct draw_context *, + unsigned slot, + const void *constants, + unsigned size); + void draw_gs_destroy( struct draw_context *draw ); /******************************************************************************* diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index a5ddec52863..f5ed32d0b05 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,7 +33,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" -#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "util/u_math.h" #include "util/u_prim.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 55e7a7b81ad..252be5053e4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 734c05f0688..c5dfbcfa3cb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -40,7 +40,6 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" -#include "translate/translate.h" struct fetch_shade_emit; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 23da556f797..56b69354b21 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -33,7 +33,6 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "draw/draw_gs.h" -#include "translate/translate.h" struct fetch_pipeline_middle_end { @@ -164,7 +163,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, vshader->run_linear(vshader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, fetch_count, fpme->vertex_size, fpme->vertex_size); @@ -172,7 +171,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, draw_geometry_shader_run(gshader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, fetch_count, fpme->vertex_size, fpme->vertex_size); @@ -249,7 +248,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -258,7 +257,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, draw_geometry_shader_run(geometry_shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -329,7 +328,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.vs_constants, + draw->pt.user.vs_constants, count, fpme->vertex_size, fpme->vertex_size); @@ -338,7 +337,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle draw_geometry_shader_run(geometry_shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->pt.user.gs_constants, + draw->pt.user.gs_constants, count, fpme->vertex_size, fpme->vertex_size); diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 55151823a14..9728d5c2bdf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -30,7 +30,6 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" #include "draw/draw_pt.h" struct pt_post_vs { diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index e03ac8c2291..6bdd612e6f4 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -48,24 +48,27 @@ -void draw_vs_set_constants( struct draw_context *draw, - const float (*constants)[4], - unsigned size ) +void +draw_vs_set_constants(struct draw_context *draw, + unsigned slot, + const void *constants, + unsigned size) { if (((uintptr_t)constants) & 0xf) { - if (size > draw->vs.const_storage_size) { - if (draw->vs.aligned_constant_storage) - align_free((void *)draw->vs.aligned_constant_storage); - draw->vs.aligned_constant_storage = align_malloc( size, 16 ); + if (size > draw->vs.const_storage_size[slot]) { + if (draw->vs.aligned_constant_storage[slot]) { + align_free((void *)draw->vs.aligned_constant_storage[slot]); + } + draw->vs.aligned_constant_storage[slot] = align_malloc(size, 16); } - memcpy( (void*)draw->vs.aligned_constant_storage, - constants, - size ); - constants = draw->vs.aligned_constant_storage; + memcpy((void *)draw->vs.aligned_constant_storage[slot], + constants, + size); + constants = draw->vs.aligned_constant_storage[slot]; } - - draw->vs.aligned_constants = constants; - draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); + + draw->vs.aligned_constants[slot] = constants; + draw_vs_aos_machine_constants(draw->vs.aos_machine, slot, constants); } @@ -182,6 +185,8 @@ draw_vs_init( struct draw_context *draw ) void draw_vs_destroy( struct draw_context *draw ) { + uint i; + if (draw->vs.fetch_cache) translate_cache_destroy(draw->vs.fetch_cache); @@ -191,8 +196,11 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.aos_machine) draw_vs_aos_machine_destroy(draw->vs.aos_machine); - if (draw->vs.aligned_constant_storage) - align_free((void*)draw->vs.aligned_constant_storage); + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + if (draw->vs.aligned_constant_storage[i]) { + align_free((void *)draw->vs.aligned_constant_storage[i]); + } + } tgsi_exec_machine_destroy(draw->vs.machine); } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 00036cfe68b..d095c9bad1d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -132,7 +132,7 @@ struct draw_vertex_shader { void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ); @@ -212,8 +212,10 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key struct aos_machine *draw_vs_aos_machine( void ); void draw_vs_aos_machine_destroy( struct aos_machine *machine ); -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ); +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants); void draw_vs_aos_machine_viewport( struct aos_machine *machine, const struct pipe_viewport_state *viewport ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1aaae4ab7a4..e7121f36541 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2114,11 +2114,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + unsigned i; if (0) debug_printf("%s %d\n", __FUNCTION__, count); machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = vaos->draw->vs.aligned_constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->constants[i] = vaos->draw->vs.aligned_constants[i]; + } machine->immediates = vaos->base.vs->immediates; machine->buffer = vaos->buffer; @@ -2135,12 +2138,15 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + unsigned i; if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, vaos->base.key.const_vbuffers); machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = vaos->draw->vs.aligned_constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->constants[i] = vaos->draw->vs.aligned_constants[i]; + } machine->immediates = vaos->base.vs->immediates; machine->buffer = vaos->buffer; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 2cf72ddf7b1..1911242f825 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -122,7 +122,7 @@ struct aos_machine { ushort fpucntl; /* one of FPU_* above */ const float (*immediates)[4]; /* points to shader data */ - const float (*constants)[4]; /* points to draw data */ + const void *constants[PIPE_MAX_CONSTANT_BUFFERS]; /* points to draw data */ const struct aos_buffer *buffer; /* points to ? */ }; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 3240e3745dd..0eda414ee6a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -219,10 +219,12 @@ static void PIPE_CDECL populate_lut( struct aos_machine *machine, } -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ) +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants) { - machine->constants = constants; + machine->constants[slot] = constants; { unsigned i; @@ -307,8 +309,10 @@ void draw_vs_aos_machine_viewport( struct aos_machine *machine, { } -void draw_vs_aos_machine_constants( struct aos_machine *machine, - const float (*constants)[4] ) +void +draw_vs_aos_machine_constants(struct aos_machine *machine, + unsigned slot, + const void *constants) { } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 41cc8026131..7deca2b69d9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -85,7 +85,7 @@ static void vs_exec_run_linear( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -95,7 +95,9 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, unsigned int i, j; unsigned slot; - machine->Consts = constants; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = constants[i]; + } for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index b3535c0e48e..fd9166fda58 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -64,7 +64,7 @@ static void vs_llvm_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -74,7 +74,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, input, base->info.num_inputs, output, base->info.num_outputs, - constants, count, input_stride, output_stride); + (const float (*)[4])constants[0], + count, input_stride, output_stride); } diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index da9f3e3d35c..d869eecec5e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -85,7 +85,7 @@ static void vs_ppc_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (float (*)[4]) constants, + (const float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 702051387ac..54e6423388f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,7 @@ static void vs_sse_run_linear( struct draw_vertex_shader *base, const float (*input)[4], float (*output)[4], - const float (*constants)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], unsigned count, unsigned input_stride, unsigned output_stride ) @@ -112,7 +112,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, /* run compiled shader */ shader->func(machine, - constants, + (const float (*)[4])constants[0], shader->base.immediates, input, base->info.num_inputs, diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 9f40030f39f..5ed706cb4ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -38,7 +38,6 @@ #include "draw/draw_vertex.h" #include "draw/draw_vs.h" #include "translate/translate.h" -#include "translate/translate_cache.h" /* A first pass at incorporating vertex fetch/emit functionality into */ @@ -148,7 +147,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); @@ -211,7 +210,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, temp_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants, + vsvg->base.vs->draw->pt.user.vs_constants, count, temp_vertex_stride, temp_vertex_stride); diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile new file mode 100644 index 00000000000..21d25d24748 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = pipebuffer + +C_SOURCES = \ + pb_buffer_fenced.c \ + pb_buffer_malloc.c \ + pb_bufmgr_alt.c \ + pb_bufmgr_cache.c \ + pb_bufmgr_debug.c \ + pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ + pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ + pb_validate.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 00000000000..a074a554717 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,18 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_alt.c', + 'pb_bufmgr_cache.c', + 'pb_bufmgr_debug.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', + 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', + 'pb_validate.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a4b78f14943..ba087ac0f34 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007-2009 VMware, Inc. + * Copyright 2007-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,9 +28,9 @@ /** * \file * Implementation of fenced buffers. - * - * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> - * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * + * \author Jose Fonseca <jfonseca-at-vmware-dot-com> + * \author Thomas Hellström <thellstrom-at-vmware-dot-com> */ @@ -50,6 +50,7 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" @@ -59,32 +60,79 @@ #define SUPER(__derived) (&(__derived)->base) -struct fenced_buffer_list +struct fenced_manager { - pipe_mutex mutex; - + struct pb_manager base; + struct pb_manager *provider; struct pb_fence_ops *ops; - - pb_size numDelayed; - struct list_head delayed; - -#ifdef DEBUG - pb_size numUnfenced; + + /** + * Maximum buffer size that can be safely allocated. + */ + pb_size max_buffer_size; + + /** + * Maximum cpu memory we can allocate before we start waiting for the + * GPU to idle. + */ + pb_size max_cpu_total_size; + + /** + * Following members are mutable and protected by this mutex. + */ + pipe_mutex mutex; + + /** + * Fenced buffer list. + * + * All fenced buffers are placed in this listed, ordered from the oldest + * fence to the newest fence. + */ + struct list_head fenced; + pb_size num_fenced; + struct list_head unfenced; -#endif + pb_size num_unfenced; + + /** + * How much temporary CPU memory is being used to hold unvalidated buffers. + */ + pb_size cpu_total_size; }; /** + * Fenced buffer. + * * Wrapper around a pipe buffer which adds fencing and reference counting. */ struct fenced_buffer { + /* + * Immutable members. + */ + struct pb_buffer base; - + struct fenced_manager *mgr; + + /* + * Following members are mutable and protected by fenced_manager::mutex. + */ + + struct list_head head; + + /** + * Buffer with storage. + */ struct pb_buffer *buffer; + pb_size size; + struct pb_desc desc; - /* FIXME: protect access with mutex */ + /** + * Temporary CPU storage data. Used when there isn't enough GPU memory to + * store the buffer. + */ + void *data; /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -93,15 +141,22 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; unsigned validation_flags; - struct pipe_fence_handle *fence; - struct list_head head; - struct fenced_buffer_list *list; + struct pipe_fence_handle *fence; }; +static INLINE struct fenced_manager * +fenced_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_manager *)mgr; +} + + static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { @@ -110,221 +165,568 @@ fenced_buffer(struct pb_buffer *buf) } -static INLINE void -_fenced_buffer_add(struct fenced_buffer *fenced_buf) -{ - struct fenced_buffer_list *fenced_list = fenced_buf->list; +static void +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf); - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(fenced_buf->fence); +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf); +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait); + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf); + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf); + + +/** + * Dump the fenced buffer list. + * + * Useful to understand failures to allocate buffers. + */ +static void +fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) +{ #ifdef DEBUG - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; + struct pb_fence_ops *ops = fenced_mgr->ops; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + + debug_printf("%10s %7s %8s %7s %10s %s\n", + "buffer", "size", "refcount", "storage", "fence", "signalled"); + + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(!fenced_buf->fence); + debug_printf("%10p %7u %8u %7s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); + curr = next; + next = curr->next; + } + + curr = fenced_mgr->fenced.next; + next = curr->next; + while(curr != &fenced_mgr->fenced) { + int signaled; + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(fenced_buf->buffer); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); + debug_printf("%10p %7u %8u %7s %10p %s\n", + (void *) fenced_buf, + fenced_buf->base.base.size, + p_atomic_read(&fenced_buf->base.base.reference.count), + "gpu", + (void *) fenced_buf->fence, + signaled == 0 ? "y" : "n"); + curr = next; + next = curr->next; + } +#else + (void)fenced_mgr; #endif - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); - ++fenced_list->numDelayed; } -/** - * Actually destroy the buffer. - */ static INLINE void -_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(!fenced_buf->fence); -#ifdef DEBUG assert(fenced_buf->head.prev); assert(fenced_buf->head.next); LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; -#else - (void)fenced_list; -#endif - pb_reference(&fenced_buf->buffer, NULL); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + FREE(fenced_buf); } +/** + * Add the buffer to the fenced list. + * + * Reference count should be incremented before calling this function. + */ static INLINE void -_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(fenced_buf->fence); + + p_atomic_inc(&fenced_buf->base.base.reference.count); + + LIST_DEL(&fenced_buf->head); + assert(fenced_mgr->num_unfenced); + --fenced_mgr->num_unfenced; + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); + ++fenced_mgr->num_fenced; +} + + +/** + * Remove the buffer from the fenced list, and potentially destroy the buffer + * if the reference count reaches zero. + * + * Returns TRUE if the buffer was detroyed. + */ +static INLINE boolean +fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; assert(fenced_buf->fence); - assert(fenced_buf->list == fenced_list); - + assert(fenced_buf->mgr == fenced_mgr); + ops->fence_reference(ops, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - + assert(fenced_buf->head.prev); assert(fenced_buf->head.next); - + LIST_DEL(&fenced_buf->head); - assert(fenced_list->numDelayed); - --fenced_list->numDelayed; - -#ifdef DEBUG - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; -#endif - - /** - * FIXME!!! - */ + assert(fenced_mgr->num_fenced); + --fenced_mgr->num_fenced; + + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + + if (p_atomic_dec_zero(&fenced_buf->base.base.reference.count)) { + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + return TRUE; + } - if(!pipe_is_referenced(&fenced_buf->base.base.reference)) - _fenced_buffer_destroy(fenced_buf); + return FALSE; } +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * This function will release and re-aquire the mutex, so any copy of mutable + * state must be discarded after calling it. + */ static INLINE enum pipe_error -_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; + enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); + if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { - return PIPE_ERROR; + struct pipe_fence_handle *fence = NULL; + int finished; + boolean proceed; + + ops->fence_reference(ops, &fence, fenced_buf->fence); + + pipe_mutex_unlock(fenced_mgr->mutex); + + finished = ops->fence_finish(ops, fenced_buf->fence, 0); + + pipe_mutex_lock(fenced_mgr->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + /* + * Only proceed if the fence object didn't change in the meanwhile. + * Otherwise assume the work has been already carried out by another + * thread that re-aquired the lock before us. + */ + proceed = fence == fenced_buf->fence ? TRUE : FALSE; + + ops->fence_reference(ops, &fence, NULL); + + if(proceed && finished == 0) { + /* + * Remove from the fenced list + */ + + boolean destroyed; + + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + /* TODO: remove consequents buffers with the same fence? */ + + assert(!destroyed); + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; } - /* Remove from the fenced list */ - /* TODO: remove consequents */ - _fenced_buffer_remove(fenced_list, fenced_buf); } - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return PIPE_OK; + return ret; } /** - * Free as many fenced buffers from the list head as possible. + * Remove as many fenced buffers from the fenced list as possible. + * + * Returns TRUE if at least one buffer was removed. */ -static void -_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait) +static boolean +fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, + boolean wait) { - struct pb_fence_ops *ops = fenced_list->ops; + struct pb_fence_ops *ops = fenced_mgr->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; - struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; + boolean ret = FALSE; - curr = fenced_list->delayed.next; + curr = fenced_mgr->fenced.next; next = curr->next; - while(curr != &fenced_list->delayed) { + while(curr != &fenced_mgr->fenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); if(fenced_buf->fence != prev_fence) { int signaled; - if (wait) + + if (wait) { signaled = ops->fence_finish(ops, fenced_buf->fence, 0); - else + + /* + * Don't return just now. Instead preemptively check if the + * following buffers' fences already expired, without further waits. + */ + wait = FALSE; + } + else { signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) - break; + } + + if (signaled != 0) { + return ret; + } + prev_fence = fenced_buf->fence; } else { + /* This buffer's fence object is identical to the previous buffer's + * fence object, so no need to check the fence again. + */ assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - _fenced_buffer_remove(fenced_list, fenced_buf); + fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + + ret = TRUE; + + curr = next; + next = curr->next; + } + + return ret; +} + + +/** + * Try to free some GPU memory by backing it up into CPU memory. + * + * Returns TRUE if at least one buffer was freed. + */ +static boolean +fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr) +{ + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; - curr = next; + curr = fenced_mgr->unfenced.next; + next = curr->next; + while(curr != &fenced_mgr->unfenced) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + /* + * We can only move storage if the buffer is not mapped and not + * validated. + */ + if(fenced_buf->buffer && + !fenced_buf->mapcount && + !fenced_buf->vl) { + enum pipe_error ret; + + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + if(ret == PIPE_OK) { + ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf); + if(ret == PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + return TRUE; + } + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } + } + + curr = next; next = curr->next; } + + return FALSE; } +/** + * Destroy CPU storage for this buffer. + */ static void -fenced_buffer_destroy(struct pb_buffer *buf) +fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; + if(fenced_buf->data) { + align_free(fenced_buf->data); + fenced_buf->data = NULL; + assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size); + fenced_buf->mgr->cpu_total_size -= fenced_buf->size; + } +} - pipe_mutex_lock(fenced_list->mutex); - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - if (fenced_buf->fence) { - struct pb_fence_ops *ops = fenced_list->ops; - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { - struct list_head *curr, *prev; - curr = &fenced_buf->head; - prev = curr->prev; - do { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); - _fenced_buffer_remove(fenced_list, fenced_buf); - curr = prev; - prev = curr->prev; - } while (curr != &fenced_list->delayed); - } - else { - /* delay destruction */ + +/** + * Create CPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + assert(!fenced_buf->data); + if(fenced_buf->data) + return PIPE_OK; + + if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment); + if(!fenced_buf->data) + return PIPE_ERROR_OUT_OF_MEMORY; + + fenced_mgr->cpu_total_size += fenced_buf->size; + + return PIPE_OK; +} + + +/** + * Destroy the GPU storage. + */ +static void +fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) +{ + if(fenced_buf->buffer) { + pb_reference(&fenced_buf->buffer, NULL); + } +} + + +/** + * Try to create GPU storage for this buffer. + * + * This function is a shorthand around pb_manager::create_buffer for + * fenced_buffer_create_gpu_storage_locked()'s benefit. + */ +static INLINE boolean +fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf) +{ + struct pb_manager *provider = fenced_mgr->provider; + + assert(!fenced_buf->buffer); + + fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, + fenced_buf->size, + &fenced_buf->desc); + return fenced_buf->buffer ? TRUE : FALSE; +} + + +/** + * Create GPU storage for this buffer. + */ +static enum pipe_error +fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, + struct fenced_buffer *fenced_buf, + boolean wait) +{ + assert(!fenced_buf->buffer); + + /* + * Check for signaled buffers before trying to allocate. + */ + fenced_manager_check_signalled_locked(fenced_mgr, FALSE); + + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + + /* + * Keep trying while there is some sort of progress: + * - fences are expiring, + * - or buffers are being being swapped out from GPU memory into CPU memory. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, FALSE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); + } + + if(!fenced_buf->buffer && wait) { + /* + * Same as before, but this time around, wait to free buffers if + * necessary. + */ + while(!fenced_buf->buffer && + (fenced_manager_check_signalled_locked(fenced_mgr, TRUE) || + fenced_manager_free_gpu_storage_locked(fenced_mgr))) { + fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); } } - else { - _fenced_buffer_destroy(fenced_buf); + + if(!fenced_buf->buffer) { + if(0) + fenced_manager_dump_locked(fenced_mgr); + + /* give up */ + return PIPE_ERROR_OUT_OF_MEMORY; } - pipe_mutex_unlock(fenced_list->mutex); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) +{ + uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(!map) + return PIPE_ERROR; + + memcpy(map, fenced_buf->data, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static enum pipe_error +fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) +{ + const uint8_t *map; + + assert(fenced_buf->data); + assert(fenced_buf->buffer); + + map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) + return PIPE_ERROR; + + memcpy(fenced_buf->data, map, fenced_buf->size); + + pb_unmap(fenced_buf->buffer); + + return PIPE_OK; +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); + + pipe_mutex_lock(fenced_mgr->mutex); + + fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); + + pipe_mutex_unlock(fenced_mgr->mutex); } static void * -fenced_buffer_map(struct pb_buffer *buf, +fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pb_fence_ops *ops = fenced_list->ops; - void *map; + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; + void *map = NULL; + + pipe_mutex_lock(fenced_mgr->mutex); assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - - /* Serialize writes */ - if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || - ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { - /* Don't wait for the GPU to finish writing */ - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) - _fenced_buffer_remove(fenced_list, fenced_buf); - else - return NULL; + + /* + * Serialize writes. + */ + while((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { + + /* + * Don't wait for the GPU to finish accessing it, if blocking is forbidden. + */ + if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { + goto done; } - else { - /* Wait for the GPU to finish writing */ - _fenced_buffer_finish(fenced_buf); + + if (flags & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) { + break; } + + /* + * Wait for the GPU to finish accessing. This will release and re-acquire + * the mutex, so all copies of mutable state must be discarded. + */ + fenced_buffer_finish_locked(fenced_mgr, fenced_buf); } -#if 0 - /* Check for CPU write access (read is OK) */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* this is legal -- just for debugging */ - debug_warning("concurrent CPU writes"); + if(fenced_buf->buffer) { + map = pb_map(fenced_buf->buffer, flags); } -#endif - - map = pb_map(fenced_buf->buffer, flags); + else { + assert(fenced_buf->data); + map = fenced_buf->data; + } + if(map) { ++fenced_buf->mapcount; fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } +done: + pipe_mutex_unlock(fenced_mgr->mutex); + return map; } @@ -333,13 +735,20 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { - pb_unmap(fenced_buf->buffer); + if (fenced_buf->buffer) + pb_unmap(fenced_buf->buffer); --fenced_buf->mapcount; if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } + + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -349,48 +758,72 @@ fenced_buffer_validate(struct pb_buffer *buf, unsigned flags) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; enum pipe_error ret; - + + pipe_mutex_lock(fenced_mgr->mutex); + if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - return PIPE_OK; + ret = PIPE_OK; + goto done; } - + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; - /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) - return PIPE_ERROR_RETRY; - -#if 0 - /* Do not validate if buffer is still mapped */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* TODO: wait for the thread that mapped the buffer to unmap it */ - return PIPE_ERROR_RETRY; + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto done; } - /* Final sanity checking */ - assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); - assert(!fenced_buf->mapcount); -#endif if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - return PIPE_OK; + ret = PIPE_OK; + goto done; + } + + /* + * Create and update GPU storage. + */ + if(!fenced_buf->buffer) { + assert(!fenced_buf->mapcount); + + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + if(ret != PIPE_OK) { + goto done; + } + + ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf); + if(ret != PIPE_OK) { + fenced_buffer_destroy_gpu_storage_locked(fenced_buf); + goto done; + } + + if(fenced_buf->mapcount) { + debug_printf("warning: validating a buffer while it is still mapped\n"); + } + else { + fenced_buffer_destroy_cpu_storage_locked(fenced_buf); + } } - + ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - return ret; - + goto done; + fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; - - return PIPE_OK; + +done: + pipe_mutex_unlock(fenced_mgr->mutex); + + return ret; } @@ -398,36 +831,37 @@ static void fenced_buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) { - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pb_fence_ops *ops; + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + struct pb_fence_ops *ops = fenced_mgr->ops; - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - ops = fenced_list->ops; - - if(fence == fenced_buf->fence) { - /* Nothing to do */ - return; - } + pipe_mutex_lock(fenced_mgr->mutex); - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - _fenced_buffer_add(fenced_buf); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + assert(fenced_buf->buffer); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + boolean destroyed; + destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); + assert(!destroyed); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + fenced_buffer_add_locked(fenced_mgr, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; } - pipe_mutex_unlock(fenced_list->mutex); - - pb_fence(fenced_buf->buffer, fence); - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + pipe_mutex_unlock(fenced_mgr->mutex); } @@ -437,11 +871,29 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); - pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + struct fenced_manager *fenced_mgr = fenced_buf->mgr; + + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * This should only be called when the buffer is validated. Typically + * when processing relocations. + */ + assert(fenced_buf->vl); + assert(fenced_buf->buffer); + + if(fenced_buf->buffer) + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); + else { + *base_buf = buf; + *offset = 0; + } + + pipe_mutex_unlock(fenced_mgr->mutex); } -static const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, @@ -452,147 +904,166 @@ fenced_buffer_vtbl = { }; -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced_list, - struct pb_buffer *buffer) +/** + * Wrap a buffer in a fenced buffer. + */ +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + pb_size size, + const struct pb_desc *desc) { - struct fenced_buffer *buf; - - if(!buffer) - return NULL; - - buf = CALLOC_STRUCT(fenced_buffer); - if(!buf) { - pb_reference(&buffer, NULL); - return NULL; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + struct fenced_buffer *fenced_buf; + enum pipe_error ret; + + /* + * Don't stall the GPU, waste time evicting buffers, or waste memory + * trying to create a buffer that will most likely never fit into the + * graphics aperture. + */ + if(size > fenced_mgr->max_buffer_size) { + goto no_buffer; } - - pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = buffer->base.alignment; - buf->base.base.usage = buffer->base.usage; - buf->base.base.size = buffer->base.size; - - buf->base.vtbl = &fenced_buffer_vtbl; - buf->buffer = buffer; - buf->list = fenced_list; - -#ifdef DEBUG - pipe_mutex_lock(fenced_list->mutex); - LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); - ++fenced_list->numUnfenced; - pipe_mutex_unlock(fenced_list->mutex); -#endif - return &buf->base; -} + fenced_buf = CALLOC_STRUCT(fenced_buffer); + if(!fenced_buf) + goto no_buffer; + pipe_reference_init(&fenced_buf->base.base.reference, 1); + fenced_buf->base.base.alignment = desc->alignment; + fenced_buf->base.base.usage = desc->usage; + fenced_buf->base.base.size = size; + fenced_buf->size = size; + fenced_buf->desc = *desc; -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops) -{ - struct fenced_buffer_list *fenced_list; + fenced_buf->base.vtbl = &fenced_buffer_vtbl; + fenced_buf->mgr = fenced_mgr; - fenced_list = CALLOC_STRUCT(fenced_buffer_list); - if (!fenced_list) - return NULL; + pipe_mutex_lock(fenced_mgr->mutex); + + /* + * Try to create GPU storage without stalling, + */ + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE); - fenced_list->ops = ops; + /* + * Attempt to use CPU memory to avoid stalling the GPU. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); + } - LIST_INITHEAD(&fenced_list->delayed); - fenced_list->numDelayed = 0; - -#ifdef DEBUG - LIST_INITHEAD(&fenced_list->unfenced); - fenced_list->numUnfenced = 0; -#endif + /* + * Create GPU storage, waiting for some to be available. + */ + if(ret != PIPE_OK) { + ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); + } + + /* + * Give up. + */ + if(ret != PIPE_OK) { + goto no_storage; + } - pipe_mutex_init(fenced_list->mutex); + assert(fenced_buf->buffer || fenced_buf->data); - return fenced_list; -} + LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); + ++fenced_mgr->num_unfenced; + pipe_mutex_unlock(fenced_mgr->mutex); + return &fenced_buf->base; -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait) -{ - pipe_mutex_lock(fenced_list->mutex); - _fenced_buffer_list_check_free(fenced_list, wait); - pipe_mutex_unlock(fenced_list->mutex); +no_storage: + pipe_mutex_unlock(fenced_mgr->mutex); + FREE(fenced_buf); +no_buffer: + return NULL; } -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_flush(struct pb_manager *mgr) { - struct pb_fence_ops *ops = fenced_list->ops; - struct list_head *curr, *next; - struct fenced_buffer *fenced_buf; + struct fenced_manager *fenced_mgr = fenced_manager(mgr); - pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; + pipe_mutex_unlock(fenced_mgr->mutex); - debug_printf("%10s %7s %7s %10s %s\n", - "buffer", "size", "refcount", "fence", "signalled"); - - curr = fenced_list->unfenced.next; - next = curr->next; - while(curr != &fenced_list->unfenced) { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(!fenced_buf->fence); - debug_printf("%10p %7u %7u\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count)); - curr = next; - next = curr->next; - } - - curr = fenced_list->delayed.next; - next = curr->next; - while(curr != &fenced_list->delayed) { - int signaled; - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - debug_printf("%10p %7u %7u %10p %s\n", - (void *) fenced_buf, - fenced_buf->base.base.size, - p_atomic_read(&fenced_buf->base.base.reference.count), - (void *) fenced_buf->fence, - signaled == 0 ? "y" : "n"); - curr = next; - next = curr->next; - } - - pipe_mutex_unlock(fenced_list->mutex); + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); } -#endif -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) { - pipe_mutex_lock(fenced_list->mutex); + struct fenced_manager *fenced_mgr = fenced_manager(mgr); + + pipe_mutex_lock(fenced_mgr->mutex); /* Wait on outstanding fences */ - while (fenced_list->numDelayed) { - pipe_mutex_unlock(fenced_list->mutex); + while (fenced_mgr->num_fenced) { + pipe_mutex_unlock(fenced_mgr->mutex); #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif - _fenced_buffer_list_check_free(fenced_list, 1); - pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_mgr->mutex); + while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) + ; } #ifdef DEBUG - /*assert(!fenced_list->numUnfenced);*/ + /*assert(!fenced_mgr->num_unfenced);*/ #endif - - pipe_mutex_unlock(fenced_list->mutex); - - fenced_list->ops->destroy(fenced_list->ops); - - FREE(fenced_list); + + pipe_mutex_unlock(fenced_mgr->mutex); + pipe_mutex_destroy(fenced_mgr->mutex); + + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); + + fenced_mgr->ops->destroy(fenced_mgr->ops); + + FREE(fenced_mgr); } +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size) +{ + struct fenced_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->ops = ops; + fenced_mgr->max_buffer_size = max_buffer_size; + fenced_mgr->max_cpu_total_size = max_cpu_total_size; + + LIST_INITHEAD(&fenced_mgr->fenced); + fenced_mgr->num_fenced = 0; + + LIST_INITHEAD(&fenced_mgr->unfenced); + fenced_mgr->num_unfenced = 0; + + pipe_mutex_init(fenced_mgr->mutex); + + return &fenced_mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 034ca1e024a..0372f81d0a1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -98,43 +98,6 @@ struct pb_fence_ops }; -/** - * Create a fenced buffer list. - * - * See also fenced_bufmgr_create for a more convenient way to use this. - */ -struct fenced_buffer_list * -fenced_buffer_list_create(struct pb_fence_ops *ops); - - -/** - * Walk the fenced buffer list to check and free signalled buffers. - */ -void -fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, - int wait); - - -#ifdef DEBUG -void -fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list); -#endif - - -void -fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list); - - -/** - * Wrap a buffer in a fenced buffer. - * - * NOTE: this will not increase the buffer reference count. - */ -struct pb_buffer * -fenced_buffer_create(struct fenced_buffer_list *fenced, - struct pb_buffer *buffer); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8c8d7130781..06669917ff6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -175,7 +175,9 @@ struct pb_fence_ops; */ struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops); + struct pb_fence_ops *ops, + pb_size max_buffer_size, + pb_size max_cpu_total_size); struct pb_manager * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 6e3214ca9c9..8f74180a111 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -371,6 +371,9 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, struct pb_desc real_desc; pb_size real_size; + assert(size); + assert(desc->alignment); + buf = CALLOC_STRUCT(pb_debug_buffer); if(!buf) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c deleted file mode 100644 index 97dd1427fda..00000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ /dev/null @@ -1,152 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/** - * \file - * A buffer manager that wraps buffers in fenced buffers. - * - * \author Jose Fonseca <[email protected]> - */ - - -#include "util/u_debug.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" -#include "pb_buffer_fenced.h" -#include "pb_bufmgr.h" - - -struct fenced_pb_manager -{ - struct pb_manager base; - - struct pb_manager *provider; - - struct fenced_buffer_list *fenced_list; -}; - - -static INLINE struct fenced_pb_manager * -fenced_pb_manager(struct pb_manager *mgr) -{ - assert(mgr); - return (struct fenced_pb_manager *)mgr; -} - - -static struct pb_buffer * -fenced_bufmgr_create_buffer(struct pb_manager *mgr, - pb_size size, - const struct pb_desc *desc) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - struct pb_buffer *buf; - struct pb_buffer *fenced_buf; - - /* check for free buffers before allocating new ones */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { - /* try harder to get a buffer */ - fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1); - - buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); - if(!buf) { -#if 0 - fenced_buffer_list_dump(fenced_mgr->fenced_list); -#endif - - /* give up */ - return NULL; - } - } - - fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); - if(!fenced_buf) { - pb_reference(&buf, NULL); - } - - return fenced_buf; -} - - -static void -fenced_bufmgr_flush(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_check_free(fenced_mgr->fenced_list, TRUE); - - assert(fenced_mgr->provider->flush); - if(fenced_mgr->provider->flush) - fenced_mgr->provider->flush(fenced_mgr->provider); -} - - -static void -fenced_bufmgr_destroy(struct pb_manager *mgr) -{ - struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); - - fenced_buffer_list_destroy(fenced_mgr->fenced_list); - - if(fenced_mgr->provider) - fenced_mgr->provider->destroy(fenced_mgr->provider); - - FREE(fenced_mgr); -} - - -struct pb_manager * -fenced_bufmgr_create(struct pb_manager *provider, - struct pb_fence_ops *ops) -{ - struct fenced_pb_manager *fenced_mgr; - - if(!provider) - return NULL; - - fenced_mgr = CALLOC_STRUCT(fenced_pb_manager); - if (!fenced_mgr) - return NULL; - - fenced_mgr->base.destroy = fenced_bufmgr_destroy; - fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; - fenced_mgr->base.flush = fenced_bufmgr_flush; - - fenced_mgr->provider = provider; - fenced_mgr->fenced_list = fenced_buffer_list_create(ops); - if(!fenced_mgr->fenced_list) { - FREE(fenced_mgr); - return NULL; - } - - return &fenced_mgr->base; -} diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index ce40c0cf0e6..903afc749d3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -39,7 +39,6 @@ #include "util/u_debug.h" #include "pb_buffer.h" -#include "pb_buffer_fenced.h" #include "pb_validate.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index de9cbc86305..e38b0be7ab5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -103,6 +103,7 @@ tgsi_default_declaration( void ) declaration.File = TGSI_FILE_NULL; declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Dimension = 0; declaration.Semantic = 0; declaration.Centroid = 0; declaration.Invariant = 0; @@ -116,6 +117,7 @@ tgsi_build_declaration( unsigned file, unsigned usage_mask, unsigned interpolate, + unsigned dimension, unsigned semantic, unsigned centroid, unsigned invariant, @@ -130,6 +132,7 @@ tgsi_build_declaration( declaration.File = file; declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; + declaration.Dimension = dimension; declaration.Semantic = semantic; declaration.Centroid = centroid; declaration.Invariant = invariant; @@ -183,6 +186,7 @@ tgsi_build_full_declaration( full_decl->Declaration.File, full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, + full_decl->Declaration.Dimension, full_decl->Declaration.Semantic, full_decl->Declaration.Centroid, full_decl->Declaration.Invariant, @@ -199,6 +203,20 @@ tgsi_build_full_declaration( declaration, header ); + if (full_decl->Declaration.Dimension) { + struct tgsi_declaration_dimension *dd; + + if (maxsize <= size) { + return 0; + } + dd = (struct tgsi_declaration_dimension *)&tokens[size]; + size++; + + *dd = tgsi_build_declaration_dimension(full_decl->Dim.Index2D, + declaration, + header); + } + if( full_decl->Declaration.Semantic ) { struct tgsi_declaration_semantic *ds; @@ -249,6 +267,34 @@ tgsi_build_declaration_range( return declaration_range; } +struct tgsi_declaration_dimension +tgsi_default_declaration_dimension(void) +{ + struct tgsi_declaration_dimension dd; + + dd.Index2D = 0; + dd.Padding = 0; + + return dd; +} + +struct tgsi_declaration_dimension +tgsi_build_declaration_dimension(unsigned index_2d, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_dimension dd; + + assert(index_2d <= 0xFFFF); + + dd = tgsi_default_declaration_dimension(); + dd.Index2D = index_2d; + + declaration_grow(declaration, header); + + return dd; +} + struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 9de2757fe40..ebee4ce5f6a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -64,6 +64,7 @@ tgsi_build_declaration( unsigned file, unsigned usage_mask, unsigned interpolate, + unsigned dimension, unsigned semantic, unsigned centroid, unsigned invariant, @@ -89,6 +90,14 @@ tgsi_build_declaration_range( struct tgsi_declaration *declaration, struct tgsi_header *header ); +struct tgsi_declaration_dimension +tgsi_default_declaration_dimension(void); + +struct tgsi_declaration_dimension +tgsi_build_declaration_dimension(unsigned index_2d, + struct tgsi_declaration *declaration, + struct tgsi_header *header); + struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d7ff262f30a..dd365558755 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -159,7 +159,9 @@ static const char *property_names[] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES" + "GS_MAX_OUTPUT_VERTICES", + "FS_COORD_ORIGIN", + "FS_COORD_PIXEL_CENTER" }; static const char *primitive_names[] = @@ -176,29 +178,18 @@ static const char *primitive_names[] = "POLYGON" }; - -static void -_dump_register_decl( - struct dump_ctx *ctx, - uint file, - int first, - int last ) +static const char *fs_coord_origin_names[] = { - ENM( file, file_names ); + "UPPER_LEFT", + "LOWER_LEFT" +}; - /* all geometry shader inputs are two dimensional */ - if (file == TGSI_FILE_INPUT && - ctx->iter.processor.Processor == TGSI_PROCESSOR_GEOMETRY) - TXT("[]"); +static const char *fs_coord_pixel_center_names[] = +{ + "HALF_INTEGER", + "INTEGER" +}; - CHR( '[' ); - SID( first ); - if (first != last) { - TXT( ".." ); - SID( last ); - } - CHR( ']' ); -} static void _dump_register_dst( @@ -219,8 +210,13 @@ _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { + ENM(src->Register.File, file_names); + if (src->Register.Dimension) { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } if (src->Register.Indirect) { - ENM( src->Register.File, file_names ); CHR( '[' ); ENM( src->Indirect.File, file_names ); CHR( '[' ); @@ -234,16 +230,10 @@ _dump_register_src( } CHR( ']' ); } else { - ENM( src->Register.File, file_names ); CHR( '[' ); SID( src->Register.Index ); CHR( ']' ); } - if (src->Register.Dimension) { - CHR( '[' ); - SID( src->Dimension.Index ); - CHR( ']' ); - } } static void @@ -300,11 +290,28 @@ iter_declaration( TXT( "DCL " ); - _dump_register_decl( - ctx, - decl->Declaration.File, - decl->Range.First, - decl->Range.Last ); + ENM(decl->Declaration.File, file_names); + + /* all geometry shader inputs are two dimensional */ + if (decl->Declaration.File == TGSI_FILE_INPUT && + iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY) { + TXT("[]"); + } + + if (decl->Declaration.Dimension) { + CHR('['); + SID(decl->Dim.Index2D); + CHR(']'); + } + + CHR('['); + SID(decl->Range.First); + if (decl->Range.First != decl->Range.Last) { + TXT(".."); + SID(decl->Range.Last); + } + CHR(']'); + _dump_writemask( ctx, decl->Declaration.UsageMask ); @@ -373,6 +380,12 @@ iter_property( case TGSI_PROPERTY_GS_OUTPUT_PRIM: ENM(prop->u[i].Data, primitive_names); break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + ENM(prop->u[i].Data, fs_coord_origin_names); + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + ENM(prop->u[i].Data, fs_coord_pixel_center_names); + break; default: SID( prop->u[i].Data ); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 118a638ab48..fbb9aa0e63a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -953,107 +953,90 @@ micro_sub( } static void -fetch_src_file_channel( - const struct tgsi_exec_machine *mach, - const uint file, - const uint swizzle, - const union tgsi_exec_channel *index, - union tgsi_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: - assert(mach->Consts); - if (index->i[0] < 0) - chan->f[0] = 0.0f; - else - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - if (index->i[1] < 0) - chan->f[1] = 0.0f; - else - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - if (index->i[2] < 0) - chan->f[2] = 0.0f; - else - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - if (index->i[3] < 0) - chan->f[3] = 0.0f; - else - chan->f[3] = mach->Consts[index->i[3]][swizzle]; - break; +fetch_src_file_channel(const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + const union tgsi_exec_channel *index2D, + union tgsi_exec_channel *chan) +{ + uint i; - case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; + switch (file) { + case TGSI_FILE_CONSTANT: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); + assert(mach->Consts[index2D->i[i]]); - case TGSI_FILE_TEMPORARY: - assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; + if (index->i[i] < 0) { + chan->u[i] = 0; + } else { + const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - assert( index->i[1] < (int) mach->ImmLimit ); - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - assert( index->i[2] < (int) mach->ImmLimit ); - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - assert( index->i[3] < (int) mach->ImmLimit ); - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; + chan->u[i] = p[index->i[i] * 4 + swizzle]; + } + } + break; - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; + case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: + for (i = 0; i < QUAD_SIZE; i++) { + /* XXX: 2D indexing */ + chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + } + break; - case TGSI_FILE_PREDICATE: - assert(index->i[0] < TGSI_EXEC_NUM_PREDS); - assert(index->i[1] < TGSI_EXEC_NUM_PREDS); - assert(index->i[2] < TGSI_EXEC_NUM_PREDS); - assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; - break; + case TGSI_FILE_TEMPORARY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] == 0); - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; + chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; + } + break; - default: - assert( 0 ); - chan->u[0] = 0; - chan->u[1] = 0; - chan->u[2] = 0; - chan->u[3] = 0; + case TGSI_FILE_IMMEDIATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); + assert(index2D->i[i] == 0); + + chan->f[i] = mach->Imms[index->i[i]][swizzle]; + } + break; + + case TGSI_FILE_ADDRESS: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; + } + break; + + case TGSI_FILE_PREDICATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; + } + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); + + chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; } break; default: - assert( 0 ); - chan->u[0] = 0; - chan->u[1] = 0; - chan->u[2] = 0; - chan->u[3] = 0; + assert(0); + for (i = 0; i < QUAD_SIZE; i++) { + chan->u[i] = 0; + } } } @@ -1065,6 +1048,7 @@ fetch_source(const struct tgsi_exec_machine *mach, enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; + union tgsi_exec_channel index2D; uint swizzle; /* We start with a direct index into a register file. @@ -1103,12 +1087,12 @@ fetch_source(const struct tgsi_exec_machine *mach, /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index2, - &indir_index ); + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); /* add value of address register to the offset */ index.i[0] += indir_index.i[0]; @@ -1129,44 +1113,22 @@ fetch_source(const struct tgsi_exec_machine *mach, * subscript to a register file. Effectively it means that * the register file is actually a 2D array of registers. * - * file[1][3] == file[1*sizeof(file[1])+3], + * file[3][1], * where: * [3] = Dimension.Index */ if (reg->Register.Dimension) { - /* The size of the first-order array depends on the register file type. - * We need to multiply the index to the first array to get an effective, - * "flat" index that points to the beginning of the second-order array. - */ - switch (reg->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: - index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - break; - case TGSI_FILE_CONSTANT: - index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; - break; - default: - assert( 0 ); - } - - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. * Nothing stops us from indirectly addressing the indirect register, * but there is no need for that, so we won't exercise it. * - * file[1][ind[4].y+3], + * file[ind[4].y+3][1], * where: * ind = DimIndirect.File * [4] = DimIndirect.Index @@ -1184,24 +1146,25 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[3] = reg->DimIndirect.Index; swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->DimIndirect.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ for (i = 0; i < QUAD_SIZE; i++) { - if ((execmask & (1 << i)) == 0) - index.i[i] = 0; + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } } } @@ -1209,15 +1172,20 @@ fetch_source(const struct tgsi_exec_machine *mach, * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; } swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->Register.File, - swizzle, - &index, - chan ); + fetch_src_file_channel(mach, + reg->Register.File, + swizzle, + &index, + &index2D, + chan); if (reg->Register.Absolute) { if (src_datatype == TGSI_EXEC_DATA_FLOAT) { @@ -1280,12 +1248,12 @@ store_dest(struct tgsi_exec_machine *mach, swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index, - &indir_index ); + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index, + &ZeroVec, + &indir_index); /* save indirection offset */ offset = indir_index.i[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 59e3b445cc3..a22873e4c2b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -260,7 +260,7 @@ struct tgsi_exec_machine struct tgsi_sampler **Samplers; unsigned ImmLimit; - const float (*Consts)[4]; + const void *Consts[PIPE_MAX_CONSTANT_BUFFERS]; const struct tgsi_token *Tokens; /**< Declarations, instructions */ unsigned Processor; /**< TGSI_PROCESSOR_x */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 8c7062d850c..fd37fc3079b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -109,6 +109,10 @@ tgsi_parse_token( next_token( ctx, &decl->Range ); + if (decl->Declaration.Dimension) { + next_token(ctx, &decl->Dim); + } + if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 439a57269b7..8150e3cd29d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -58,6 +58,7 @@ struct tgsi_full_declaration { struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; + struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index e1e4f97967d..91e1b27da12 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -335,10 +335,6 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); - if (!(ind_reg->file == TGSI_FILE_ADDRESS || ind_reg->file == TGSI_FILE_LOOP) || - ind_reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } check_register_usage( ctx, ind_reg, @@ -412,12 +408,16 @@ iter_declaration( uint vert; for (vert = 0; vert < ctx->implied_array_size; ++vert) { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register2d(reg, file, vert, i); + fill_scan_register2d(reg, file, i, vert); check_and_declare(ctx, reg); } } else { scan_register *reg = MALLOC(sizeof(scan_register)); - fill_scan_register1d(reg, file, i); + if (decl->Declaration.Dimension) { + fill_scan_register2d(reg, file, i, decl->Dim.Index2D); + } else { + fill_scan_register1d(reg, file, i); + } check_and_declare(ctx, reg); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index a6cc773003a..b9be8dc0a31 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -101,12 +101,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens, src->Register.File == TGSI_FILE_SYSTEM_VALUE) { const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { - info->uses_fogcoord = TRUE; - } - else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { - info->uses_frontfacing = TRUE; - } + info->uses_fogcoord = TRUE; + } + else if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FACE) { + info->uses_frontfacing = TRUE; } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9fcffeda368..96be353e264 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -553,7 +553,7 @@ parse_register_dcl_bracket( report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } - bracket->first = (int) uindex; + bracket->first = uindex; eat_opt_white( &ctx->cur ); @@ -617,10 +617,12 @@ parse_register_dcl( * input primitive. so we want to declare just * the index relevant to the semantics which is in * the second bracket */ - if (ctx->processor == TGSI_PROCESSOR_GEOMETRY) { + if (ctx->processor == TGSI_PROCESSOR_GEOMETRY && *file == TGSI_FILE_INPUT) { brackets[0] = brackets[1]; + *num_brackets = 1; + } else { + *num_brackets = 2; } - *num_brackets = 2; } return TRUE; @@ -738,6 +740,13 @@ parse_src_operand( return FALSE; src->Register.File = file; + if (parsed_opt_brackets) { + src->Register.Dimension = 1; + src->Dimension.Indirect = 0; + src->Dimension.Dimension = 0; + src->Dimension.Index = bracket[0].index; + bracket[0] = bracket[1]; + } src->Register.Index = bracket[0].index; if (bracket[0].ind_file != TGSI_FILE_NULL) { src->Register.Indirect = 1; @@ -748,12 +757,6 @@ parse_src_operand( src->Indirect.SwizzleZ = bracket[0].ind_comp; src->Indirect.SwizzleW = bracket[0].ind_comp; } - if (parsed_opt_brackets) { - src->Register.Dimension = 1; - src->Dimension.Indirect = 0; - src->Dimension.Dimension = 0; - src->Dimension.Index = bracket[1].index; - } /* Parse optional swizzle. */ @@ -933,7 +936,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] = "NORMAL", "FACE", "EDGEFLAG", - "PRIM_ID" + "PRIM_ID", + "INSTANCEID" }; static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = @@ -968,8 +972,17 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.Range.First = brackets[0].first; - decl.Range.Last = brackets[0].last; + + if (num_brackets == 1) { + decl.Range.First = brackets[0].first; + decl.Range.Last = brackets[0].last; + } else { + decl.Range.First = brackets[1].first; + decl.Range.Last = brackets[1].last; + + decl.Declaration.Dimension = 1; + decl.Dim.Index2D = brackets[0].first; + } cur = ctx->cur; eat_opt_white( &cur ); @@ -1116,7 +1129,9 @@ static const char *property_names[] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES" + "GS_MAX_OUTPUT_VERTICES", + "FS_COORD_ORIGIN", + "FS_COORD_PIXEL_CENTER" }; static const char *primitive_names[] = @@ -1133,6 +1148,19 @@ static const char *primitive_names[] = "POLYGON" }; +static const char *fs_coord_origin_names[] = +{ + "UPPER_LEFT", + "LOWER_LEFT" +}; + +static const char *fs_coord_pixel_center_names[] = +{ + "HALF_INTEGER", + "INTEGER" +}; + + static boolean parse_primitive( const char **pcur, uint *primitive ) { @@ -1150,6 +1178,40 @@ parse_primitive( const char **pcur, uint *primitive ) return FALSE; } +static boolean +parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) +{ + uint i; + + for (i = 0; i < sizeof(fs_coord_origin_names) / sizeof(fs_coord_origin_names[0]); i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, fs_coord_origin_names[i])) { + *fs_coord_origin = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} + +static boolean +parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) +{ + uint i; + + for (i = 0; i < sizeof(fs_coord_pixel_center_names) / sizeof(fs_coord_pixel_center_names[0]); i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, fs_coord_pixel_center_names[i])) { + *fs_coord_pixel_center = i; + *pcur = cur; + return TRUE; + } + } + return FALSE; +} + static boolean parse_property( struct translate_ctx *ctx ) { @@ -1191,6 +1253,18 @@ static boolean parse_property( struct translate_ctx *ctx ) ctx->implied_array_size = u_vertices_per_prim(values[0]); } break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + if (!parse_fs_coord_origin(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown coord origin as property: must be UPPER_LEFT or LOWER_LEFT!" ); + return FALSE; + } + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + if (!parse_fs_coord_pixel_center(&ctx->cur, &values[0] )) { + report_error( ctx, "Unknown coord pixel center as property: must be HALF_INTEGER or INTEGER!" ); + return FALSE; + } + break; default: if (!parse_uint(&ctx->cur, &values[0] )) { report_error( ctx, "Expected unsigned integer as property!" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 8bd6f68dcc0..0ae46785bbe 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -44,6 +44,7 @@ union tgsi_any_token { struct tgsi_property_data prop_data; struct tgsi_declaration decl; struct tgsi_declaration_range decl_range; + struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_semantic decl_semantic; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; @@ -75,6 +76,14 @@ struct ureg_tokens { #define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 +struct const_decl { + struct { + unsigned first; + unsigned last; + } constant_range[UREG_MAX_CONSTANT_RANGE]; + unsigned nr_constant_ranges; +}; + #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -127,13 +136,14 @@ struct ureg_program unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; - struct { - unsigned first; - unsigned last; - } constant_range[UREG_MAX_CONSTANT_RANGE]; - unsigned nr_constant_ranges; + struct const_decl const_decls; + struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; unsigned property_gs_input_prim; + unsigned property_gs_output_prim; + unsigned property_gs_max_vertices; + unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ + unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ unsigned nr_addrs; unsigned nr_preds; @@ -235,36 +245,40 @@ ureg_dst_register( unsigned file, return dst; } -static INLINE struct ureg_src -ureg_src_register( unsigned file, - unsigned index ) + +void +ureg_property_gs_input_prim(struct ureg_program *ureg, + unsigned input_prim) { - struct ureg_src src; - - src.File = file; - src.SwizzleX = TGSI_SWIZZLE_X; - src.SwizzleY = TGSI_SWIZZLE_Y; - src.SwizzleZ = TGSI_SWIZZLE_Z; - src.SwizzleW = TGSI_SWIZZLE_W; - src.Indirect = 0; - src.IndirectIndex = 0; - src.IndirectSwizzle = 0; - src.Absolute = 0; - src.Index = index; - src.Negate = 0; - src.Dimension = 0; - src.DimensionIndex = 0; - - return src; + ureg->property_gs_input_prim = input_prim; } +void +ureg_property_gs_output_prim(struct ureg_program *ureg, + unsigned output_prim) +{ + ureg->property_gs_output_prim = output_prim; +} +void +ureg_property_gs_max_vertices(struct ureg_program *ureg, + unsigned max_vertices) +{ + ureg->property_gs_max_vertices = max_vertices; +} void -ureg_property_gs_input_prim(struct ureg_program *ureg, - unsigned gs_input_prim) +ureg_property_fs_coord_origin(struct ureg_program *ureg, + unsigned fs_coord_origin) +{ + ureg->property_fs_coord_origin = fs_coord_origin; +} + +void +ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, + unsigned fs_coord_pixel_center) { - ureg->property_gs_input_prim = gs_input_prim; + ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; } @@ -374,62 +388,92 @@ out: /* Returns a new constant register. Keep track of which have been * referred to so that we can emit decls later. * + * Constant operands declared with this function must be addressed + * with a two-dimensional index. + * * There is nothing in this code to bind this constant to any tracked * value or manage any constant_buffer contents -- that's the * resposibility of the calling code. */ -struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, - unsigned index ) +void +ureg_DECL_constant2D(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned index2D) +{ + struct const_decl *decl = &ureg->const_decls2D[index2D]; + + assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); + + if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + uint i = decl->nr_constant_ranges++; + + decl->constant_range[i].first = first; + decl->constant_range[i].last = last; + } +} + + +/* A one-dimensional, depricated version of ureg_DECL_constant2D(). + * + * Constant operands declared with this function must be addressed + * with a one-dimensional index. + */ +struct ureg_src +ureg_DECL_constant(struct ureg_program *ureg, + unsigned index) { + struct const_decl *decl = &ureg->const_decls; unsigned minconst = index, maxconst = index; unsigned i; /* Inside existing range? */ - for (i = 0; i < ureg->nr_constant_ranges; i++) { - if (ureg->constant_range[i].first <= index && - ureg->constant_range[i].last >= index) + for (i = 0; i < decl->nr_constant_ranges; i++) { + if (decl->constant_range[i].first <= index && + decl->constant_range[i].last >= index) { goto out; + } } /* Extend existing range? */ - for (i = 0; i < ureg->nr_constant_ranges; i++) { - if (ureg->constant_range[i].last == index - 1) { - ureg->constant_range[i].last = index; + for (i = 0; i < decl->nr_constant_ranges; i++) { + if (decl->constant_range[i].last == index - 1) { + decl->constant_range[i].last = index; goto out; } - if (ureg->constant_range[i].first == index + 1) { - ureg->constant_range[i].first = index; + if (decl->constant_range[i].first == index + 1) { + decl->constant_range[i].first = index; goto out; } - minconst = MIN2(minconst, ureg->constant_range[i].first); - maxconst = MAX2(maxconst, ureg->constant_range[i].last); + minconst = MIN2(minconst, decl->constant_range[i].first); + maxconst = MAX2(maxconst, decl->constant_range[i].last); } /* Create new range? */ - if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { - i = ureg->nr_constant_ranges++; - ureg->constant_range[i].first = index; - ureg->constant_range[i].last = index; + if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { + i = decl->nr_constant_ranges++; + decl->constant_range[i].first = index; + decl->constant_range[i].last = index; goto out; } /* Collapse all ranges down to one: */ i = 0; - ureg->constant_range[0].first = minconst; - ureg->constant_range[0].last = maxconst; - ureg->nr_constant_ranges = 1; + decl->constant_range[0].first = minconst; + decl->constant_range[0].last = maxconst; + decl->nr_constant_ranges = 1; out: - assert(i < ureg->nr_constant_ranges); - assert(ureg->constant_range[i].first <= index); - assert(ureg->constant_range[i].last >= index); - return ureg_src_register( TGSI_FILE_CONSTANT, index ); + assert(i < decl->nr_constant_ranges); + assert(decl->constant_range[i].first <= index); + assert(decl->constant_range[i].last >= index); + return ureg_src_register(TGSI_FILE_CONSTANT, index); } @@ -578,7 +622,7 @@ decl_immediate( struct ureg_program *ureg, unsigned type ) { unsigned i, j; - unsigned swizzle; + unsigned swizzle = 0; /* Could do a first pass where we examine all existing immediates * without expanding. @@ -656,6 +700,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg, struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + uint index; + uint i; + + if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { + set_bad(ureg); + return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); + } + + index = ureg->nr_immediates; + ureg->nr_immediates += (nr + 3) / 4; + + for (i = index; i < ureg->nr_immediates; i++) { + ureg->immediate[i].type = TGSI_IMM_UINT32; + ureg->immediate[i].nr = nr > 4 ? 4 : nr; + memcpy(ureg->immediate[i].value.u, + &v[(i - index) * 4], + ureg->immediate[i].nr * sizeof(uint)); + nr -= 4; + } + + return ureg_src_register(TGSI_FILE_IMMEDIATE, index); +} + + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *ureg, const int *v, unsigned nr ) @@ -691,7 +764,7 @@ ureg_emit_src( struct ureg_program *ureg, if (src.Indirect) { out[0].src.Indirect = 1; out[n].value = 0; - out[n].src.File = TGSI_FILE_ADDRESS; + out[n].src.File = src.IndirectFile; out[n].src.SwizzleX = src.IndirectSwizzle; out[n].src.SwizzleY = src.IndirectSwizzle; out[n].src.SwizzleZ = src.IndirectSwizzle; @@ -1058,6 +1131,31 @@ static void emit_decl_range( struct ureg_program *ureg, } static void +emit_decl_range2D(struct ureg_program *ureg, + unsigned file, + unsigned first, + unsigned last, + unsigned index2D) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = file; + out[0].decl.UsageMask = 0xf; + out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; + out[0].decl.Dimension = 1; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; + + out[2].value = 0; + out[2].decl_dim.Index2D = index2D; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1104,6 +1202,38 @@ static void emit_decls( struct ureg_program *ureg ) ureg->property_gs_input_prim); } + if (ureg->property_gs_output_prim != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_OUTPUT_PRIM, + ureg->property_gs_output_prim); + } + + if (ureg->property_gs_max_vertices != ~0) { + assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); + + emit_property(ureg, + TGSI_PROPERTY_GS_MAX_VERTICES, + ureg->property_gs_max_vertices); + } + + if (ureg->property_fs_coord_origin) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COORD_ORIGIN, + ureg->property_fs_coord_origin); + } + + if (ureg->property_fs_coord_pixel_center) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, + ureg->property_fs_coord_pixel_center); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { @@ -1152,13 +1282,29 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } - if (ureg->nr_constant_ranges) { - for (i = 0; i < ureg->nr_constant_ranges; i++) - emit_decl_range( ureg, - TGSI_FILE_CONSTANT, - ureg->constant_range[i].first, - (ureg->constant_range[i].last + 1 - - ureg->constant_range[i].first) ); + if (ureg->const_decls.nr_constant_ranges) { + for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { + emit_decl_range(ureg, + TGSI_FILE_CONSTANT, + ureg->const_decls.constant_range[i].first, + ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); + } + } + + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + struct const_decl *decl = &ureg->const_decls2D[i]; + + if (decl->nr_constant_ranges) { + uint j; + + for (j = 0; j < decl->nr_constant_ranges; j++) { + emit_decl_range2D(ureg, + TGSI_FILE_CONSTANT, + decl->constant_range[j].first, + decl->constant_range[j].last, + i); + } + } } if (ureg->nr_temps) { @@ -1314,6 +1460,8 @@ struct ureg_program *ureg_create( unsigned processor ) ureg->processor = processor; ureg->property_gs_input_prim = ~0; + ureg->property_gs_output_prim = ~0; + ureg->property_gs_max_vertices = ~0; return ureg; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 03eaf24854c..e25f35c6dc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -52,9 +52,10 @@ struct ureg_src unsigned Absolute : 1; /* BOOL */ unsigned Negate : 1; /* BOOL */ int Index : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ int IndirectIndex : 16; /* SINT */ - int IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -125,8 +126,23 @@ ureg_create_shader_and_destroy( struct ureg_program *p, void ureg_property_gs_input_prim(struct ureg_program *ureg, - unsigned gs_input_prim); + unsigned input_prim); + +void +ureg_property_gs_output_prim(struct ureg_program *ureg, + unsigned output_prim); +void +ureg_property_gs_max_vertices(struct ureg_program *ureg, + unsigned max_vertices); + +void +ureg_property_fs_coord_origin(struct ureg_program *ureg, + unsigned fs_coord_origin); + +void +ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, + unsigned fs_coord_pixel_center); /*********************************************************************** * Build shader declarations: @@ -168,10 +184,21 @@ ureg_DECL_immediate_uint( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_block_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src ureg_DECL_immediate_int( struct ureg_program *, const int *v, unsigned nr ); +void +ureg_DECL_constant2D(struct ureg_program *ureg, + unsigned first, + unsigned last, + unsigned index2D); + struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -768,8 +795,9 @@ static INLINE struct ureg_src ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) { assert(reg.File != TGSI_FILE_NULL); - assert(addr.File == TGSI_FILE_ADDRESS); + assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY); reg.Indirect = 1; + reg.IndirectFile = addr.File; reg.IndirectIndex = addr.Index; reg.IndirectSwizzle = addr.SwizzleX; return reg; @@ -789,6 +817,8 @@ ureg_dst( struct ureg_src src ) { struct ureg_dst dst; + assert(!src.Indirect || src.IndirectFile == TGSI_FILE_ADDRESS); + dst.File = src.File; dst.WriteMask = TGSI_WRITEMASK_XYZW; dst.Indirect = src.Indirect; @@ -807,6 +837,30 @@ ureg_dst( struct ureg_src src ) } static INLINE struct ureg_src +ureg_src_register(unsigned file, + unsigned index) +{ + struct ureg_src src; + + src.File = file; + src.SwizzleX = TGSI_SWIZZLE_X; + src.SwizzleY = TGSI_SWIZZLE_Y; + src.SwizzleZ = TGSI_SWIZZLE_Z; + src.SwizzleW = TGSI_SWIZZLE_W; + src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; + src.IndirectIndex = 0; + src.IndirectSwizzle = 0; + src.Absolute = 0; + src.Index = index; + src.Negate = 0; + src.Dimension = 0; + src.DimensionIndex = 0; + + return src; +} + +static INLINE struct ureg_src ureg_src( struct ureg_dst dst ) { struct ureg_src src; @@ -817,6 +871,7 @@ ureg_src( struct ureg_dst dst ) src.SwizzleZ = TGSI_SWIZZLE_Z; src.SwizzleW = TGSI_SWIZZLE_W; src.Indirect = dst.Indirect; + src.IndirectFile = TGSI_FILE_ADDRESS; src.IndirectIndex = dst.IndirectIndex; src.IndirectSwizzle = dst.IndirectSwizzle; src.Absolute = 0; @@ -863,6 +918,7 @@ ureg_src_undef( void ) src.SwizzleZ = 0; src.SwizzleW = 0; src.Indirect = 0; + src.IndirectFile = TGSI_FILE_NULL; src.IndirectIndex = 0; src.IndirectSwizzle = 0; src.Absolute = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index f4ca9e21ed9..0a7e4105a80 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -28,7 +28,6 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" -#include "tgsi_build.h" #include "tgsi_util.h" union pointer_hack diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f74e2aa8b8..eb63bec7b50 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -92,7 +92,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) /* disabled blending/masking */ memset(&ctx->blend, 0, sizeof(ctx->blend)); - ctx->blend.colormask = PIPE_MASK_RGBA; + ctx->blend.rt[0].colormask = PIPE_MASK_RGBA; /* no-op depth/stencil/alpha */ memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); @@ -226,8 +226,8 @@ setup_vertex_data_tex(struct blit_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } @@ -262,6 +262,10 @@ regions_overlap(int srcX0, int srcY0, * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. * Flipping and stretching are supported. + * \param filter one of PIPE_TEX_MIPFILTER_NEAREST/LINEAR + * \param writemask controls which channels in the dest surface are sourced + * from the src surface. Disabled channels are sourced + * from (0,0,0,1). * XXX what about clipping??? * XXX need some control over blitting Z and/or stencil. */ diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 249a0375fc5..935e11c5d85 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -125,7 +125,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&blend, 0, sizeof(blend)); ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend); - blend.colormask = PIPE_MASK_RGBA; + blend.rt[0].colormask = PIPE_MASK_RGBA; ctx->blend_write_color = pipe->create_blend_state(pipe, &blend); /* depth stencil alpha state objects */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 76023794dcd..8611231ed70 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1287,7 +1287,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* disabled blending/masking */ memset(&ctx->blend, 0, sizeof(ctx->blend)); - ctx->blend.colormask = PIPE_MASK_RGBA; + ctx->blend.rt[0].colormask = PIPE_MASK_RGBA; /* no-op depth/stencil/alpha */ memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); @@ -1411,8 +1411,8 @@ set_vertex_data(struct gen_mipmap_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write(ctx->pipe->screen, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); + pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + offset, sizeof(ctx->vertices), ctx->vertices); return offset; } diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index f66376ad750..70de140ec9d 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -37,7 +37,6 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "util/u_format.h" #include "util/u_surface.h" diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f9936eb1cb2..c25e1e52e9d 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -390,7 +390,7 @@ a4r4g4b4_put_tile_rgba(ushort *dst, g >>= 4; b >>= 4; a >>= 4; - *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + *dst++ = (a << 12) | (r << 8) | (g << 4) | b; } p += src_stride; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 975ee89c455..55a65375c81 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -85,7 +85,9 @@ my_buffer_write(struct pipe_screen *screen, map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | + PIPE_BUFFER_USAGE_DISCARD | + PIPE_BUFFER_USAGE_UNSYNCHRONIZED); if (map == NULL) return PIPE_ERROR_OUT_OF_MEMORY; |