diff options
author | Christian König <[email protected]> | 2011-03-19 01:02:40 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-03-19 01:02:40 +0100 |
commit | 2bf95c519e755146704f4942b1703d47d18bfeaa (patch) | |
tree | 9d29c5d56014377013770615611f903cd5b25292 /src/gallium/drivers | |
parent | f36846c77ee196881c0da560229279fc7ed88170 (diff) | |
parent | 8042d751debb7a8375e8bc587189fea9a5a8371d (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
src/gallium/drivers/r600/r600_asm.c
src/gallium/tests/unit/SConscript
Diffstat (limited to 'src/gallium/drivers')
146 files changed, 2222 insertions, 1730 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index f9b83c8666c..58e647a39fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -104,18 +104,6 @@ static const struct debug_named_value cell_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -static unsigned int -cell_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * cell_create_context(struct pipe_screen *screen, @@ -140,8 +128,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.clear = cell_clear; cell->pipe.flush = cell_flush; - cell->pipe.is_resource_referenced = cell_is_resource_referenced; - #if 0 cell->pipe.begin_query = cell_begin_query; cell->pipe.end_query = cell_end_query; diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index e7c9fc46d9f..181fef44f45 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -59,9 +59,10 @@ cell_fence_signalled(const struct cell_context *cell, } -void +boolean cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence) + const struct cell_fence *fence, + uint64_t timeout) { while (!cell_fence_signalled(cell, fence)) { usleep(10); @@ -75,6 +76,7 @@ cell_fence_finish(const struct cell_context *cell, } } #endif + return TRUE; } diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h index 536b4ba411a..3568230b1c0 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -36,12 +36,15 @@ cell_fence_init(struct cell_fence *fence); extern boolean cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence); + const struct cell_fence *fence, + unsigned flags); -extern void +extern boolean cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence); + const struct cell_fence *fence, + unsigned flags, + uint64_t timeout); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 8275c9dc9c7..463f4d03eb9 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -38,19 +38,16 @@ * Called via pipe->flush() */ void -cell_flush(struct pipe_context *pipe, unsigned flags, +cell_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { struct cell_context *cell = cell_context(pipe); if (fence) { *fence = NULL; - /* XXX: Implement real fencing */ - flags |= CELL_FLUSH_WAIT; } - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) - flags |= CELL_FLUSH_WAIT; + flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); cell_flush_int(cell, flags); diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 8d2b4b96438..0ee124a24fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -149,8 +149,7 @@ cell_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags ) + unsigned tex_usage) { struct sw_winsys *winsys = cell_screen(screen)->winsys; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index d60718d9716..0fefec9aaea 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -79,7 +79,7 @@ static void failover_draw_vbo( struct pipe_context *pipe, if (failover->mode == FO_SW) { if (failover->dirty) { - failover->hw->flush( failover->hw, ~0, NULL ); + failover->hw->flush( failover->hw, NULL ); failover_state_emit( failover ); } @@ -89,22 +89,10 @@ static void failover_draw_vbo( struct pipe_context *pipe, * intervening flush. Unlikely to be much performance impact to * this: */ - failover->sw->flush( failover->sw, ~0, NULL ); + failover->sw->flush( failover->sw, NULL ); } } -static unsigned int -failover_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - struct failover_context *failover = failover_context( _pipe ); - struct pipe_context *pipe = (failover->mode == FO_HW) ? - failover->hw : failover->sw; - - return pipe->is_resource_referenced(pipe, resource, level, layer); -} - struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ) { @@ -150,7 +138,6 @@ struct pipe_context *failover_create( struct pipe_context *hw, #endif failover->pipe.flush = hw->flush; - failover->pipe.is_resource_referenced = failover_is_resource_referenced; failover->dirty = 0; diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index 75e4c253dd9..813a21e2ee3 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -761,34 +761,15 @@ galahad_clear_depth_stencil(struct pipe_context *_pipe, static void galahad_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct galahad_context *glhd_pipe = galahad_context(_pipe); struct pipe_context *pipe = glhd_pipe->pipe; pipe->flush(pipe, - flags, fence); } -static unsigned int -galahad_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct galahad_context *glhd_pipe = galahad_context(_pipe); - struct galahad_resource *glhd_resource = galahad_resource(_resource); - struct pipe_context *pipe = glhd_pipe->pipe; - struct pipe_resource *resource = glhd_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); -} - static struct pipe_sampler_view * galahad_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *_resource, @@ -1038,7 +1019,6 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->base.clear_render_target = galahad_clear_render_target; glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil; glhd_pipe->base.flush = galahad_flush; - glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; glhd_pipe->base.create_surface = galahad_context_create_surface; diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c index b4825bef66d..b4edebe4920 100644 --- a/src/gallium/drivers/galahad/glhd_screen.c +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -106,8 +106,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; @@ -120,8 +119,7 @@ galahad_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -276,30 +274,28 @@ galahad_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean galahad_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean galahad_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } struct pipe_screen * diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO index f4e1423fa59..fba180064c3 100644 --- a/src/gallium/drivers/i915/TODO +++ b/src/gallium/drivers/i915/TODO @@ -12,22 +12,19 @@ Random list of problems with i915g: unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still broken, it doesn't update the viewport/get new buffers. -- Tends to hang the chip after a few minutes of openarena. Looks tiling related, - at the last frame rendered has tiling corruption over the complete frame. - - Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in - wireframe mode. - -- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's - going on. Seems to depend on tiny details like whethever the sampler - relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!). + wireframe mode. Changing the cullmode to cw from none mitigates the crash. As + does emitting only one line segment (2 indices) per 3D_PRIMITIVE command in + the batch. - Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason? Texture sampling from Y-tiled buffers seems to work, though (save above problems). + RESOLVED: Y-tiling works with the render engine, but not with the blitter. + Use u_blitter and hw clears (PRIM3D_CLEAR_RECT). -- Need to validate buffers before usage. Currently do_exec on the batchbuffer - can fail with -ENOSPC. +- src/xvmc/i915_structs.h in xf86-video-intel has a few more bits of various + commands defined. Scavenge them and see what's useful. Other bugs can be found here: https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index 039c8713570..ce2691b2fd7 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -37,6 +37,9 @@ #define OUT_BATCH(dword) \ i915_winsys_batchbuffer_dword(i915->batch, dword) +#define OUT_BATCH_F(f) \ + i915_winsys_batchbuffer_float(i915->batch, f) + #define OUT_RELOC(buf, usage, offset) \ i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index 9df82272604..78554034781 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -55,6 +55,16 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch, } static INLINE void +i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch, + float f) +{ + union { float f; unsigned int ui; } uif; + uif.f = f; + assert (i915_winsys_batchbuffer_space(batch) >= 4); + i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui); +} + +static INLINE void i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch, unsigned dword) { diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 6d824a507aa..4a97746e981 100644 --- a/src/gallium/drivers/i915/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c @@ -31,17 +31,118 @@ #include "util/u_clear.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" #include "i915_context.h" +#include "i915_screen.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_resource.h" +#include "i915_state.h" +void +i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + uint32_t clear_params, clear_color, clear_depth, clear_stencil, + clear_color8888, packed_z_stencil; + union util_color u_color; + float f_depth = depth; + struct i915_texture *cbuf_tex, *depth_tex; + + cbuf_tex = depth_tex = NULL; + clear_params = 0; + + if (buffers & PIPE_CLEAR_COLOR) { + struct pipe_surface *cbuf = i915->framebuffer.cbufs[0]; + + clear_params |= CLEARPARAM_WRITE_COLOR; + cbuf_tex = i915_texture(cbuf->texture); + util_pack_color(rgba, cbuf->format, &u_color); + if (util_format_get_blocksize(cbuf_tex->b.b.format) == 4) + clear_color = u_color.ui; + else + clear_color = (u_color.ui & 0xffff) | (u_color.ui << 16); + + util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &u_color); + clear_color8888 = u_color.ui; + } else + clear_color = clear_color8888 = 0; + + clear_depth = clear_stencil = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + struct pipe_surface *zbuf = i915->framebuffer.zsbuf; + + clear_params |= CLEARPARAM_WRITE_DEPTH; + depth_tex = i915_texture(zbuf->texture); + packed_z_stencil = util_pack_z_stencil(depth_tex->b.b.format, depth, stencil); + + if (util_format_get_blocksize(depth_tex->b.b.format) == 4) { + /* Avoid read-modify-write if there's no stencil. */ + if (buffers & PIPE_CLEAR_STENCIL + || depth_tex->b.b.format != PIPE_FORMAT_Z24_UNORM_S8_USCALED) { + clear_params |= CLEARPARAM_WRITE_STENCIL; + clear_stencil = packed_z_stencil & 0xff; + clear_depth = packed_z_stencil; + } else + clear_depth = packed_z_stencil & 0xffffff00; + } else { + clear_depth = (clear_depth & 0xffff) | (clear_depth << 16); + } + } + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(7 + 7)) { + FLUSH_BATCH(NULL); + + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + assert(BEGIN_BATCH(7 + 7)); + } + + OUT_BATCH(_3DSTATE_CLEAR_PARAMETERS); + OUT_BATCH(clear_params | CLEARPARAM_CLEAR_RECT); + OUT_BATCH(clear_color); + OUT_BATCH(clear_depth); + OUT_BATCH(clear_color8888); + OUT_BATCH_F(f_depth); + OUT_BATCH(clear_stencil); + + OUT_BATCH(_3DPRIMITIVE | PRIM3D_CLEAR_RECT | 5); + OUT_BATCH_F(destx + width); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty + height); + OUT_BATCH_F(destx); + OUT_BATCH_F(desty); +} /** * Clear the given buffers to the specified values. * No masking, no scissor (clear entire buffer). */ void -i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) +i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) { util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth, stencil); } + +void +i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + struct i915_context *i915 = i915_context(pipe); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, buffers, rgba, depth, stencil, + 0, 0, i915->framebuffer.width, i915->framebuffer.height); +} diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 84c8cb54436..7a98ef73c1f 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -103,6 +103,9 @@ static void i915_destroy(struct pipe_context *pipe) int i; draw_destroy(i915->draw); + + if (i915->blitter) + util_blitter_destroy(i915->blitter); if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); @@ -137,7 +140,10 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->base.destroy = i915_destroy; - i915->base.clear = i915_clear; + if (i915_screen(screen)->debug.use_blitter) + i915->base.clear = i915_clear_blitter; + else + i915->base.clear = i915_clear_render; i915->base.draw_vbo = i915_draw_vbo; @@ -145,6 +151,10 @@ i915_create_context(struct pipe_screen *screen, void *priv) util_slab_create(&i915->transfer_pool, sizeof(struct pipe_transfer), 16, UTIL_SLAB_SINGLETHREADED); + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915->iws->batchbuffer_create(i915->iws); + /* * Create drawing context and plug our rendering stage into it. */ @@ -164,15 +174,19 @@ i915_create_context(struct pipe_screen *screen, void *priv) draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); + /* augmented draw pipeline clobbers state functions */ + i915_init_fixup_state_functions(i915); + + /* Create blitter last - calls state creation functions. */ + i915->blitter = util_blitter_create(&i915->base); + assert(i915->blitter); + i915->dirty = ~0; i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; i915->flush_dirty = 0; - /* Batch stream debugging is a bit hacked up at the moment: - */ - i915->batch = i915->iws->batchbuffer_create(i915->iws); - return &i915->base; } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 1da637d068e..dacf50e870d 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -38,6 +38,7 @@ #include "tgsi/tgsi_scan.h" #include "util/u_slab.h" +#include "util/u_blitter.h" struct i915_winsys; @@ -185,7 +186,7 @@ struct i915_rasterizer_state { unsigned LIS7; unsigned sc[1]; - const struct pipe_rasterizer_state *templ; + struct pipe_rasterizer_state templ; union { float f; unsigned u; } ds[2]; }; @@ -244,14 +245,35 @@ struct i915_context { struct i915_state current; unsigned hardware_dirty; - unsigned immediate_dirty; - unsigned dynamic_dirty; - unsigned flush_dirty; + unsigned immediate_dirty : I915_MAX_IMMEDIATE; + unsigned dynamic_dirty : I915_MAX_DYNAMIC; + unsigned static_dirty : 4; + unsigned flush_dirty : 2; struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; int num_validation_buffers; struct util_slab_mempool transfer_pool; + + /** blitter/hw-clear */ + struct blitter_context* blitter; + + /** State tracking needed by u_blitter for save/restore. */ + void *saved_fs; + void (*saved_bind_fs_state)(struct pipe_context *pipe, void *shader); + void *saved_vs; + struct pipe_clip_state saved_clip; + struct i915_velems_state *saved_velems; + unsigned saved_nr_vertex_buffers; + struct pipe_vertex_buffer saved_vertex_buffers[PIPE_MAX_ATTRIBS]; + unsigned saved_nr_samplers; + void *saved_samplers[PIPE_MAX_SAMPLERS]; + void (*saved_bind_sampler_states)(struct pipe_context *pipe, + unsigned num, void **sampler); + unsigned saved_nr_sampler_views; + struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS]; + void (*saved_set_sampler_views)(struct pipe_context *pipe, + unsigned num, struct pipe_sampler_view **views); }; /* A flag for each state_tracker state object: @@ -296,6 +318,12 @@ struct i915_context { #define I915_FLUSH_CACHE 1 #define I915_PIPELINE_FLUSH 2 +/* split up static state */ +#define I915_DST_BUF_COLOR 1 +#define I915_DST_BUF_DEPTH 2 +#define I915_DST_VARS 4 +#define I915_DST_RECT 8 + static INLINE void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) { @@ -326,14 +354,20 @@ void i915_emit_hardware_state(struct i915_context *i915 ); /*********************************************************************** * i915_clear.c: */ -void i915_clear( struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil); +void i915_clear_blitter(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_render(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil); +void i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil, + unsigned destx, unsigned desty, unsigned width, unsigned height); /*********************************************************************** * */ void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_fixup_state_functions( struct i915_context *i915 ); void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 1713bf131f2..c4eed473e90 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -46,17 +46,18 @@ static const struct debug_named_value debug_options[] = { }; unsigned i915_debug = 0; -boolean i915_tiling = TRUE; DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0) DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE) DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(i915_use_blitter, "I915_USE_BLITTER", FALSE) void i915_debug_init(struct i915_screen *is) { i915_debug = debug_get_option_i915_debug(); is->debug.tiling = !debug_get_option_i915_no_tiling(); is->debug.lie = debug_get_option_i915_lie(); + is->debug.use_blitter = debug_get_option_i915_use_blitter(); } diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index f2044d661e3..b4e81147c4f 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -39,34 +39,12 @@ static void i915_flush_pipe( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); -#if 0 - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - unsigned flush = MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush |= INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush |= FLUSH_MAP_CACHE; - - if (!BEGIN_BATCH(1)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1)); - } - OUT_BATCH( flush ); - } -#endif - if (i915->batch->map == i915->batch->ptr) { return; } @@ -96,6 +74,7 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + i915->static_dirty = ~0; /* kernel emits flushes in between batchbuffers */ i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index 276e33d4b9d..85656cd7846 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -149,7 +149,6 @@ emit_prim( struct draw_stage *stage, /* Make sure state is re-emitted after a flush: */ - i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index fb4c0516dd8..79db3b650eb 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -470,7 +470,6 @@ draw_arrays_fallback(struct vbuf_render *render, /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; @@ -519,7 +518,6 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; @@ -640,7 +638,6 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, /* Make sure state is re-emitted after a flush: */ - i915_update_derived(i915); i915_emit_hardware_state(i915); i915->vbo_flushed = 1; diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index 5e4e80ddf6b..6fe032cdb6e 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -148,6 +148,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) diff --git a/src/gallium/drivers/i915/i915_resource.c b/src/gallium/drivers/i915/i915_resource.c index 499233ceb9b..7f52ba11d61 100644 --- a/src/gallium/drivers/i915/i915_resource.c +++ b/src/gallium/drivers/i915/i915_resource.c @@ -31,7 +31,6 @@ i915_resource_from_handle(struct pipe_screen * screen, void i915_init_resource_functions(struct i915_context *i915 ) { - i915->base.is_resource_referenced = u_default_is_resource_referenced; i915->base.get_transfer = u_get_transfer_vtbl; i915->base.transfer_map = u_transfer_map_vtbl; i915->base.transfer_flush_region = u_transfer_flush_region_vtbl; diff --git a/src/gallium/drivers/i915/i915_resource_buffer.c b/src/gallium/drivers/i915/i915_resource_buffer.c index 51482f54fc4..77c03450b3a 100644 --- a/src/gallium/drivers/i915/i915_resource_buffer.c +++ b/src/gallium/drivers/i915/i915_resource_buffer.c @@ -123,7 +123,6 @@ struct u_resource_vtbl i915_buffer_vtbl = { i915_buffer_get_handle, /* get_handle */ i915_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ i915_get_transfer, /* get_transfer */ i915_transfer_destroy, /* transfer_destroy */ i915_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index aad5235a6ad..7816925d230 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -184,7 +184,10 @@ i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex) /* XXX X-tiling might make sense */ return I915_TILE_NONE; - return I915_TILE_X; + if (is->debug.use_blitter) + return I915_TILE_X; + else + return I915_TILE_Y; } @@ -756,6 +759,9 @@ i915_texture_transfer_map(struct pipe_context *pipe, assert(box->z == 0); offset = i915_texture_offset(tex, transfer->level, box->z); + /* TODO this is a sledgehammer */ + pipe->flush(pipe, NULL); + map = iws->buffer_map(iws, tex->buffer, (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); if (map == NULL) @@ -781,7 +787,6 @@ struct u_resource_vtbl i915_texture_vtbl = { i915_texture_get_handle, /* get_handle */ i915_texture_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ i915_texture_get_transfer, /* get_transfer */ i915_transfer_destroy, /* transfer_destroy */ i915_texture_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 77febbf5012..e62b609eb5a 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -105,6 +105,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) switch (cap) { /* Supported features (boolean caps). */ case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */ case PIPE_CAP_TEXTURE_MIRROR_REPEAT: @@ -115,17 +116,18 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) /* Features that should be supported (boolean caps). */ /* XXX: Just test the code */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; + /* XXX: No code but hw supports it */ + case PIPE_CAP_POINT_SPRITE: + /* Also lie about these when asked to (needed for GLSL / GL 2.0) */ + return is->debug.lie ? 1 : 0; /* Unsupported features (boolean caps). */ case PIPE_CAP_ARRAY_TEXTURES: case PIPE_CAP_DEPTH_CLAMP: - case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */ - case PIPE_CAP_GLSL: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_INSTANCED_DRAWING: /* draw module? */ - case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: @@ -133,6 +135,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) return 0; /* Features we can lie about (boolean caps). */ + case PIPE_CAP_GLSL: case PIPE_CAP_OCCLUSION_QUERY: return is->debug.lie ? 1 : 0; @@ -249,8 +252,7 @@ i915_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_B8G8R8A8_UNORM, @@ -317,24 +319,23 @@ i915_fence_reference(struct pipe_screen *screen, is->iws->fence_reference(is->iws, ptr, fence); } -static int +static boolean i915_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_signalled(is->iws, fence); + return is->iws->fence_signalled(is->iws, fence) == 0; } -static int +static boolean i915_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct i915_screen *is = i915_screen(screen); - return is->iws->fence_finish(is->iws, fence); + return is->iws->fence_finish(is->iws, fence) == 0; } diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index 60f0e2971e0..cfc585b5350 100644 --- a/src/gallium/drivers/i915/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h @@ -49,6 +49,7 @@ struct i915_screen struct { boolean tiling; boolean lie; + boolean use_blitter; } debug; }; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 58bbbd1de2c..1b57c5776f2 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -287,6 +287,17 @@ i915_create_sampler_state(struct pipe_context *pipe, return cso; } +static void i915_fixup_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->saved_nr_samplers = num; + memcpy(&i915->saved_samplers, sampler, sizeof(void *) * num); + + i915->saved_bind_sampler_states(pipe, num, sampler); +} + static void i915_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { @@ -466,6 +477,17 @@ i915_create_fs_state(struct pipe_context *pipe, } static void +i915_fixup_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->saved_fs = shader; + + i915->saved_bind_fs_state(pipe, shader); +} + +static void i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); @@ -505,6 +527,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); + i915->saved_vs = shader; + /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); @@ -571,6 +595,27 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, } +static void +i915_fixup_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + struct i915_context *i915 = i915_context(pipe); + int i; + + for (i = 0; i < num; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + views[i]); + + for (i = num; i < i915->saved_nr_sampler_views; i++) + pipe_sampler_view_reference(&i915->saved_sampler_views[i], + NULL); + + i915->saved_nr_sampler_views = num; + + i915->saved_set_sampler_views(pipe, num, views); +} + static void i915_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -642,7 +687,8 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, i915->framebuffer.height = fb->height; i915->framebuffer.nr_cbufs = fb->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + pipe_surface_reference(&i915->framebuffer.cbufs[i], + i < fb->nr_cbufs ? fb->cbufs[i] : NULL); } pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); @@ -657,6 +703,8 @@ static void i915_set_clip_state( struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); draw_flush(i915->draw); + i915->saved_clip = *clip; + draw_set_clip_state(i915->draw, clip); i915->dirty |= I915_NEW_CLIP; @@ -687,7 +735,7 @@ i915_create_rasterizer_state(struct pipe_context *pipe, { struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); - cso->templ = rasterizer; + cso->templ = *rasterizer; cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; cso->light_twoside = rasterizer->light_twoside; cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; @@ -758,7 +806,7 @@ static void i915_bind_rasterizer_state( struct pipe_context *pipe, /* pass-through to draw module */ draw_set_rasterizer_state(i915->draw, - (i915->rasterizer ? i915->rasterizer->templ : NULL), + (i915->rasterizer ? &(i915->rasterizer->templ) : NULL), raster); i915->dirty |= I915_NEW_RASTERIZER; @@ -778,6 +826,9 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, struct draw_context *draw = i915->draw; int i; + util_copy_vertex_buffers(i915->saved_vertex_buffers, + &i915->saved_nr_vertex_buffers, + buffers, count); #if 0 /* XXX doesn't look like this is needed */ /* unmap old */ @@ -818,6 +869,8 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; + i915->saved_velems = velems; + /* pass-through to draw module */ if (i915_velems) { draw_set_vertex_elements(i915->draw, @@ -896,3 +949,14 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_index_buffer = i915_set_index_buffer; i915->base.redefine_user_buffer = u_default_redefine_user_buffer; } + +void +i915_init_fixup_state_functions( struct i915_context *i915 ) +{ + i915->saved_bind_fs_state = i915->base.bind_fs_state; + i915->base.bind_fs_state = i915_fixup_bind_fs_state; + i915->saved_bind_sampler_states = i915->base.bind_fragment_sampler_states; + i915->base.bind_fragment_sampler_states = i915_fixup_bind_sampler_states; + i915->saved_set_sampler_views = i915->base.set_fragment_sampler_views; + i915->base.set_fragment_sampler_views = i915_fixup_set_fragment_sampler_views; +} diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index b4074dc35ba..3f4e40294e6 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -48,6 +48,7 @@ extern struct i915_tracked_state i915_hw_immediate; extern struct i915_tracked_state i915_hw_dynamic; extern struct i915_tracked_state i915_hw_fs; extern struct i915_tracked_state i915_hw_framebuffer; +extern struct i915_tracked_state i915_hw_dst_buf_vars; extern struct i915_tracked_state i915_hw_constants; void i915_update_derived(struct i915_context *i915); diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 1d4026a214e..59ac2f7292a 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -165,6 +165,7 @@ static struct i915_tracked_state *atoms[] = { &i915_hw_dynamic, &i915_hw_fs, &i915_hw_framebuffer, + &i915_hw_dst_buf_vars, &i915_hw_constants, NULL, }; diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 15350c0a5d7..0155cd83510 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -119,7 +119,7 @@ validate_immediate(struct i915_context *i915, unsigned *batch_space) 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & i915->immediate_dirty; - if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; *batch_space = 1 + util_bitcount(dirty); @@ -173,25 +173,31 @@ emit_dynamic(struct i915_context *i915) static void validate_static(struct i915_context *i915, unsigned *batch_space) { - *batch_space = 2 + 5; /* including DRAW_RECT */ + *batch_space = 0; - if (i915->current.cbuf_bo) { + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.cbuf_bo; *batch_space += 3; } - if (i915->current.depth_bo) { + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.depth_bo; *batch_space += 3; } + + if (i915->static_dirty & I915_DST_VARS) + *batch_space += 2; + + if (i915->static_dirty & I915_DST_RECT) + *batch_space += 5; } static void emit_static(struct i915_context *i915) { - if (i915->current.cbuf_bo) { + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(i915->current.cbuf_flags); OUT_RELOC(i915->current.cbuf_bo, @@ -201,7 +207,7 @@ emit_static(struct i915_context *i915) /* What happens if no zbuf?? */ - if (i915->current.depth_bo) { + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(i915->current.depth_flags); OUT_RELOC(i915->current.depth_bo, @@ -209,7 +215,7 @@ emit_static(struct i915_context *i915) 0); } - { + if (i915->static_dirty & I915_DST_VARS) { OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); OUT_BATCH(i915->current.dst_buf_vars); } @@ -273,7 +279,7 @@ emit_sampler(struct i915_context *i915) if (i915->current.sampler_enable_nr) { int i; - OUT_BATCH( _3DSTATE_SAMPLER_STATE | + OUT_BATCH( _3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr) ); OUT_BATCH( i915->current.sampler_enable_flags ); @@ -355,11 +361,13 @@ emit_program(struct i915_context *i915) static void emit_draw_rect(struct i915_context *i915) { - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(i915->current.draw_offset); - OUT_BATCH(i915->current.draw_size); - OUT_BATCH(i915->current.draw_offset); + if (i915->static_dirty & I915_DST_RECT) { + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); + } } static boolean @@ -405,6 +413,8 @@ i915_emit_hardware_state(struct i915_context *i915 ) unsigned batch_space; uintptr_t save_ptr; + assert(i915->dirty == 0); + if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); @@ -444,5 +454,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->hardware_dirty = 0; i915->immediate_dirty = 0; i915->dynamic_dirty = 0; + i915->static_dirty = 0; i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 20cd23f8f73..2865298318c 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -28,6 +28,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_resource.h" +#include "i915_screen.h" @@ -78,38 +79,13 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) return tiling_bits; } -/** - * Examine framebuffer state to determine width, height. - */ -static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) -{ - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; - return TRUE; - } - else { - *width = *height = 0; - return FALSE; - } -} - static void update_framebuffer(struct i915_context *i915) { struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; - unsigned cformat, zformat; - unsigned x, y, w, h; + unsigned x, y; int layer; - uint32_t draw_offset; - boolean ret; + uint32_t draw_offset, draw_size; if (cbuf_surface) { struct i915_texture *tex = i915_texture(cbuf_surface->texture); @@ -119,7 +95,6 @@ static void update_framebuffer(struct i915_context *i915) i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ buf_3d_tiling_bits(tex->tiling); - cformat = cbuf_surface->format; layer = cbuf_surface->u.tex.first_layer; @@ -127,10 +102,9 @@ static void update_framebuffer(struct i915_context *i915) y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; } else { i915->current.cbuf_bo = NULL; - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ x = y = 0; } - cformat = translate_format(cformat); + i915->static_dirty |= I915_DST_BUF_COLOR; /* What happens if no zbuf?? */ @@ -145,28 +119,23 @@ static void update_framebuffer(struct i915_context *i915) i915->current.depth_flags = BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ buf_3d_tiling_bits(tex->tiling); - zformat = translate_depth_format(depth_surface->format); - } else { + } else i915->current.depth_bo = NULL; - zformat = 0; - } - - i915->current.dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat; + i915->static_dirty |= I915_DST_BUF_DEPTH; /* drawing rect calculations */ draw_offset = x | (y << 16); - ret = framebuffer_size(&i915->framebuffer, &w, &h); - assert(ret); + draw_size = (i915->framebuffer.width - 1 + x) | + ((i915->framebuffer.height - 1 + y) << 16); if (i915->current.draw_offset != draw_offset) { i915->current.draw_offset = draw_offset; i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + i915->static_dirty |= I915_DST_RECT; + } + if (i915->current.draw_size != draw_size) { + i915->current.draw_size = draw_size; + i915->static_dirty |= I915_DST_RECT; } - i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16); i915->hardware_dirty |= I915_HW_STATIC; @@ -179,3 +148,52 @@ struct i915_tracked_state i915_hw_framebuffer = { update_framebuffer, I915_NEW_FRAMEBUFFER }; + +static void update_dst_buf_vars(struct i915_context *i915) +{ + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + uint32_t dst_buf_vars, cformat, zformat; + uint32_t early_z = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + struct i915_screen *is = i915_screen(i915->base.screen); + + zformat = translate_depth_format(depth_surface->format); + + if (is->is_i945 && tex->tiling != I915_TILE_NONE + && !i915->fs->info.writes_z) + early_z = CLASSIC_EARLY_DEPTH; + } else + zformat = 0; + + dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat | + early_z; + + if (i915->current.dst_buf_vars != dst_buf_vars) { + if (early_z != (i915->current.dst_buf_vars & CLASSIC_EARLY_DEPTH)) + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); + + i915->current.dst_buf_vars = dst_buf_vars; + i915->static_dirty |= I915_DST_VARS; + i915->hardware_dirty |= I915_HW_STATIC; + } +} + +struct i915_tracked_state i915_hw_dst_buf_vars = { + "dst buf vars", + update_dst_buf_vars, + I915_NEW_FRAMEBUFFER | I915_NEW_FS +}; diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index becc6e93c2d..d02c420f6c2 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -27,6 +27,7 @@ #include "i915_surface.h" #include "i915_resource.h" +#include "i915_state.h" #include "i915_blit.h" #include "i915_reg.h" #include "i915_screen.h" @@ -37,16 +38,119 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" +/* + * surface functions using the render engine + */ + +static void +i915_surface_copy_render(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + struct i915_context *i915 = i915_context(pipe); + + util_blitter_save_blend(i915->blitter, (void *)i915->blend); + util_blitter_save_depth_stencil_alpha(i915->blitter, (void *)i915->depth_stencil); + util_blitter_save_stencil_ref(i915->blitter, &i915->stencil_ref); + util_blitter_save_rasterizer(i915->blitter, (void *)i915->rasterizer); + util_blitter_save_fragment_shader(i915->blitter, i915->saved_fs); + util_blitter_save_vertex_shader(i915->blitter, i915->saved_vs); + util_blitter_save_viewport(i915->blitter, &i915->viewport); + util_blitter_save_clip(i915->blitter, &i915->saved_clip); + util_blitter_save_vertex_elements(i915->blitter, i915->saved_velems); + util_blitter_save_vertex_buffers(i915->blitter, i915->saved_nr_vertex_buffers, + i915->saved_vertex_buffers); + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + util_blitter_save_fragment_sampler_states(i915->blitter, + i915->saved_nr_samplers, + i915->saved_samplers); + util_blitter_save_fragment_sampler_views(i915->blitter, + i915->saved_nr_sampler_views, + i915->saved_sampler_views); + + util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box, TRUE); +} + +static void +i915_clear_render_target_render(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 1; + fb_state.cbufs[0] = dst; + fb_state.zsbuf = NULL; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, PIPE_CLEAR_COLOR, rgba, 0.0, 0x0, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +static void +i915_clear_depth_stencil_render(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_framebuffer_state fb_state; + + util_blitter_save_framebuffer(i915->blitter, &i915->framebuffer); + + fb_state.width = dst->width; + fb_state.height = dst->height; + fb_state.nr_cbufs = 0; + fb_state.zsbuf = dst; + pipe->set_framebuffer_state(pipe, &fb_state); + + if (i915->dirty) + i915_update_derived(i915); + + i915_clear_emit(pipe, clear_flags & PIPE_CLEAR_DEPTHSTENCIL, + NULL, depth, stencil, + dstx, dsty, width, height); + + pipe->set_framebuffer_state(pipe, &i915->blitter->saved_fb_state); + util_unreference_framebuffer_state(&i915->blitter->saved_fb_state); + i915->blitter->saved_fb_state.nr_cbufs = ~0; +} + +/* + * surface functions using the blitter + */ /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ static void -i915_surface_copy(struct pipe_context *pipe, - struct pipe_resource *dst, unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, unsigned src_level, - const struct pipe_box *src_box) +i915_surface_copy_blitter(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) { struct i915_texture *dst_tex = i915_texture(dst); struct i915_texture *src_tex = i915_texture(src); @@ -66,7 +170,6 @@ i915_surface_copy(struct pipe_context *pipe, assert(src_box->z == 0); src_offset = i915_texture_offset(src_tex, src_level, src_box->z); - assert( dst != src ); assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) ); assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) ); @@ -81,13 +184,12 @@ i915_surface_copy(struct pipe_context *pipe, (short) src_box->width, (short) src_box->height ); } - static void -i915_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_render_target_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -109,13 +211,13 @@ i915_clear_render_target(struct pipe_context *pipe, } static void -i915_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) +i915_clear_depth_stencil_blitter(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) { struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; @@ -193,9 +295,15 @@ i915_surface_destroy(struct pipe_context *ctx, void i915_init_surface_functions(struct i915_context *i915) { - i915->base.resource_copy_region = i915_surface_copy; - i915->base.clear_render_target = i915_clear_render_target; - i915->base.clear_depth_stencil = i915_clear_depth_stencil; + if (i915_screen(i915->base.screen)->debug.use_blitter) { + i915->base.resource_copy_region = i915_surface_copy_blitter; + i915->base.clear_render_target = i915_clear_render_target_blitter; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_blitter; + } else { + i915->base.resource_copy_region = i915_surface_copy_render; + i915->base.clear_render_target = i915_clear_render_target_render; + i915->base.clear_depth_stencil = i915_clear_depth_stencil_render; + } i915->base.create_surface = i915_create_surface; i915->base.surface_destroy = i915_surface_destroy; } diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 0ae1a6be9e5..3b4a99beed2 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -38,7 +38,6 @@ void brw_context_flush( struct brw_context *brw ) static void brw_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { brw_context_flush( brw_context( pipe ) ); diff --git a/src/gallium/drivers/i965/brw_resource_buffer.c b/src/gallium/drivers/i965/brw_resource_buffer.c index afb96ee3e7f..32dc54f2b26 100644 --- a/src/gallium/drivers/i965/brw_resource_buffer.c +++ b/src/gallium/drivers/i965/brw_resource_buffer.c @@ -91,30 +91,10 @@ brw_buffer_transfer_unmap( struct pipe_context *pipe, } -static unsigned brw_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - int layer) -{ - struct brw_context *brw = brw_context(pipe); - struct brw_winsys_buffer *batch_bo = brw->batch->buf; - struct brw_buffer *buf = brw_buffer(resource); - - if (buf->bo == NULL) - return PIPE_UNREFERENCED; - - if (!brw_screen(pipe->screen)->sws->bo_references( batch_bo, buf->bo )) - return PIPE_UNREFERENCED; - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - struct u_resource_vtbl brw_buffer_vtbl = { brw_buffer_get_handle, /* get_handle */ brw_buffer_destroy, /* resource_destroy */ - brw_buffer_is_referenced, /* is_resource_referenced */ u_default_get_transfer, /* get_transfer */ u_default_transfer_destroy, /* transfer_destroy */ brw_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/i965/brw_resource_texture.c b/src/gallium/drivers/i965/brw_resource_texture.c index 0cb895f35de..71a18290248 100644 --- a/src/gallium/drivers/i965/brw_resource_texture.c +++ b/src/gallium/drivers/i965/brw_resource_texture.c @@ -225,48 +225,6 @@ static void brw_texture_destroy(struct pipe_screen *screen, } - - -static unsigned brw_texture_is_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, - int layer ) -{ - struct brw_context *brw = brw_context(pipe); - struct brw_screen *bscreen = brw_screen(pipe->screen); - struct brw_winsys_buffer *batch_bo = brw->batch->buf; - struct brw_texture *tex = brw_texture(texture); - struct brw_surface *surf; - int i; - - /* XXX: this is subject to false positives if the underlying - * texture BO is referenced, we can't tell whether the sub-region - * we care about participates in that. - */ - if (bscreen->sws->bo_references( batch_bo, tex->bo )) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - - /* Find any view on this texture for this level/layer and see if it - * is referenced: - */ - for (i = 0; i < 2; i++) { - foreach (surf, &tex->views[i]) { - if (surf->bo == tex->bo) - continue; - - if (!(layer == -1 || surf->id.bits.layer == layer) || - surf->id.bits.level != level) - continue; - - if (bscreen->sws->bo_references( batch_bo, surf->bo)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - } - } - - return PIPE_UNREFERENCED; -} - - /* * Transfer functions */ @@ -347,7 +305,6 @@ struct u_resource_vtbl brw_texture_vtbl = { brw_texture_get_handle, /* get_handle */ brw_texture_destroy, /* resource_destroy */ - brw_texture_is_referenced, /* is_resource_referenced */ brw_texture_get_transfer, /* get_transfer */ u_default_transfer_destroy, /* transfer_destroy */ brw_texture_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index bf805fd080c..25204fd088d 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -278,8 +278,7 @@ brw_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_L8_UNORM, @@ -365,20 +364,19 @@ brw_fence_reference(struct pipe_screen *screen, { } -static int +static boolean brw_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { - return 0; /* XXX shouldn't this be a boolean? */ + return TRUE; } -static int +static boolean brw_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { - return 0; + return TRUE; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index b533abe24c6..2a9d7360155 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -668,34 +668,15 @@ identity_clear_depth_stencil(struct pipe_context *_pipe, static void identity_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; pipe->flush(pipe, - flags, fence); } -static unsigned int -identity_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct identity_resource *id_resource = identity_resource(_resource); - struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *resource = id_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); -} - static struct pipe_sampler_view * identity_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *_resource, @@ -931,7 +912,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.clear_render_target = identity_clear_render_target; id_pipe->base.clear_depth_stencil = identity_clear_depth_stencil; id_pipe->base.flush = identity_flush; - id_pipe->base.is_resource_referenced = identity_is_resource_referenced; id_pipe->base.create_surface = identity_context_create_surface; id_pipe->base.surface_destroy = identity_context_surface_destroy; id_pipe->base.create_sampler_view = identity_context_create_sampler_view; diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 644481bb748..9bf7fd4c0ee 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -103,8 +103,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; @@ -113,8 +112,7 @@ identity_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -242,30 +240,28 @@ identity_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean identity_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean identity_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } struct pipe_screen * diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index e9374cc6efa..1fc7746a834 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -12,7 +12,7 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.6 (or later) + - LLVM. Version 2.8 recommended. 2.6 or later required. For Linux, on a recent Debian based distribution do: @@ -30,21 +30,8 @@ Requirements debug=no. This is necessary as LLVM builds as static library so the chosen MS CRT must match. - The version of LLVM from SVN ("2.7svn") from mid-March 2010 is pretty - stable and has some features not in version 2.6. - - scons (optional) - - udis86, http://udis86.sourceforge.net/ (optional). My personal repository - supports more opcodes which haven't been merged upstream yet: - - git clone git://anongit.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./autogen.sh - ./configure --with-pic - make - sudo make install - Building ======== @@ -94,13 +81,7 @@ that no tail call optimizations are done by gcc. To better profile JIT code you'll need to build LLVM with oprofile integration. - source_dir=$PWD/llvm-2.6 - build_dir=$source_dir/build/profile - install_dir=$source_dir-profile - - mkdir -p "$build_dir" - cd "$build_dir" && \ - $source_dir/configure \ + ./configure \ --prefix=$install_dir \ --enable-optimized \ --disable-profiling \ diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 644201ddf7c..8a5655d4996 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -136,10 +136,9 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) static void do_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence) { - llvmpipe_flush(pipe, flags, fence, __FUNCTION__); + llvmpipe_flush(pipe, fence, __FUNCTION__); } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 849db06acdf..42430550ea6 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -40,12 +40,10 @@ /** - * \param flags bitmask of PIPE_FLUSH_x flags * \param fence if non-null, returns pointer to a fence which can be waited on */ void llvmpipe_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence, const char *reason) { @@ -54,7 +52,7 @@ llvmpipe_flush( struct pipe_context *pipe, draw_flush(llvmpipe->draw); /* ask the setup module to flush */ - lp_setup_flush(llvmpipe->setup, flags, fence, reason); + lp_setup_flush(llvmpipe->setup, fence, reason); if (llvmpipe_variant_count > 1000) { @@ -65,23 +63,21 @@ llvmpipe_flush( struct pipe_context *pipe, /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { - if (flags & PIPE_FLUSH_FRAME) { - static unsigned frame_no = 1; - char filename[256]; - unsigned i; - - for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); - } - - if (0) { - util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); - } - - ++frame_no; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); } + + ++frame_no; } } @@ -90,9 +86,9 @@ llvmpipe_finish( struct pipe_context *pipe, const char *reason ) { struct pipe_fence_handle *fence = NULL; - llvmpipe_flush(pipe, 0, &fence, reason); + llvmpipe_flush(pipe, &fence, reason); if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_finish(pipe->screen, fence, PIPE_TIMEOUT_INFINITE); pipe->screen->fence_reference(pipe->screen, &fence, NULL); } } @@ -110,7 +106,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, int layer, - unsigned flush_flags, boolean read_only, boolean cpu_access, boolean do_not_block, @@ -118,10 +113,10 @@ llvmpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, resource, level, layer); + referenced = llvmpipe_is_resource_referenced(pipe, resource, level, layer); - if ((referenced & PIPE_REFERENCED_FOR_WRITE) || - ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + if ((referenced & LP_REFERENCED_FOR_WRITE) || + ((referenced & LP_REFERENCED_FOR_READ) && !read_only)) { if (cpu_access) { /* @@ -136,7 +131,7 @@ llvmpipe_flush_resource(struct pipe_context *pipe, * Just flush. */ - llvmpipe_flush(pipe, flush_flags, NULL, reason); + llvmpipe_flush(pipe, NULL, reason); } } diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 579d24c68ad..efff94c8c0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -36,7 +36,6 @@ struct pipe_resource; void llvmpipe_flush(struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence, const char *reason); @@ -49,7 +48,6 @@ llvmpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, int layer, - unsigned flush_flags, boolean read_only, boolean cpu_access, boolean do_not_block, diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 84c66dd36e8..1e2401fa84d 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -69,7 +69,7 @@ llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) */ if (pq->fence) { if (!lp_fence_issued(pq->fence)) - llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); + llvmpipe_flush(pipe, NULL, __FUNCTION__); if (!lp_fence_signalled(pq->fence)) lp_fence_wait(pq->fence); @@ -99,7 +99,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, if (!lp_fence_signalled(pq->fence)) { if (!lp_fence_issued(pq->fence)) - llvmpipe_flush(pipe, 0, NULL, __FUNCTION__); + llvmpipe_flush(pipe, NULL, __FUNCTION__); if (!wait) return FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 2c32aa93cdf..521a52ad3ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -164,7 +164,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTH_CLAMP: return 0; - case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; default: return 0; @@ -224,8 +225,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, - unsigned geom_flags ) + unsigned bind) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct sw_winsys *winsys = screen->winsys; @@ -348,10 +348,9 @@ llvmpipe_fence_reference(struct pipe_screen *screen, /** * Has the fence been executed/finished? */ -static int +static boolean llvmpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) + struct pipe_fence_handle *fence) { struct lp_fence *f = (struct lp_fence *) fence; return lp_fence_signalled(f); @@ -361,15 +360,15 @@ llvmpipe_fence_signalled(struct pipe_screen *screen, /** * Wait for the fence to finish. */ -static int +static boolean llvmpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence_handle, - unsigned flag) + uint64_t timeout) { struct lp_fence *f = (struct lp_fence *) fence_handle; lp_fence_wait(f); - return 0; + return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 7b7b2721ba8..3813e0ed972 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -333,12 +333,8 @@ fail: } -/** - * \param flags bitmask of PIPE_FLUSH_x flags - */ void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags, struct pipe_fence_handle **fence, const char *reason) { @@ -469,7 +465,7 @@ lp_setup_clear( struct lp_setup_context *setup, unsigned flags ) { if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) { - lp_setup_flush(setup, 0, NULL, __FUNCTION__); + lp_setup_flush(setup, NULL, __FUNCTION__); if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) assert(0); @@ -753,20 +749,20 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, /* check the render targets */ for (i = 0; i < setup->fb.nr_cbufs; i++) { if (setup->fb.cbufs[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; } if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; } /* check textures referenced by the scene */ for (i = 0; i < Elements(setup->scenes); i++) { if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) { - return PIPE_REFERENCED_FOR_READ; + return LP_REFERENCED_FOR_READ; } } - return PIPE_UNREFERENCED; + return LP_UNREFERENCED; } @@ -1114,7 +1110,7 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_END_QUERY, dummy)) { - lp_setup_flush(setup, 0, NULL, __FUNCTION__); + lp_setup_flush(setup, NULL, __FUNCTION__); } } else { diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0d6e161a218..8655259d27c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -64,7 +64,6 @@ lp_setup_clear(struct lp_setup_context *setup, void lp_setup_flush( struct lp_setup_context *setup, - unsigned flags, struct pipe_fence_handle **fence, const char *reason); diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index ad751b9ef42..9f1ec146e9d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -651,7 +651,7 @@ generate_setup_variant(struct gallivm_state *gallivm, LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; LLVMBuilderRef builder = gallivm->builder; - int64_t t0, t1; + int64_t t0 = 0, t1; if (0) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index e7e46a628a1..f49638acf08 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -69,7 +69,6 @@ lp_resource_copy(struct pipe_context *pipe, llvmpipe_flush_resource(pipe, dst, dst_level, dstz, - 0, /* flush_flags */ FALSE, /* read_only */ TRUE, /* cpu_access */ FALSE, /* do_not_block */ @@ -77,7 +76,6 @@ lp_resource_copy(struct pipe_context *pipe, llvmpipe_flush_resource(pipe, src, src_level, src_box->z, - 0, /* flush_flags */ TRUE, /* read_only */ TRUE, /* cpu_access */ FALSE, /* do_not_block */ diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 9753da5e57e..fa4ce5bf2ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -575,7 +575,6 @@ llvmpipe_get_transfer(struct pipe_context *pipe, if (!llvmpipe_flush_resource(pipe, resource, level, box->depth > 1 ? -1 : box->z, - 0, /* flush_flags */ read_only, TRUE, /* cpu_access */ do_not_block, @@ -695,7 +694,7 @@ llvmpipe_transfer_unmap(struct pipe_context *pipe, transfer->box.z); } -static unsigned int +unsigned int llvmpipe_is_resource_referenced( struct pipe_context *pipe, struct pipe_resource *presource, unsigned level, int layer) @@ -703,7 +702,7 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); if (presource->target == PIPE_BUFFER) - return PIPE_UNREFERENCED; + return LP_UNREFERENCED; return lp_setup_is_resource_referenced(llvmpipe->setup, presource); } @@ -1401,7 +1400,6 @@ llvmpipe_init_context_resource_funcs(struct pipe_context *pipe) pipe->transfer_destroy = llvmpipe_transfer_destroy; pipe->transfer_map = llvmpipe_transfer_map; pipe->transfer_unmap = llvmpipe_transfer_unmap; - pipe->is_resource_referenced = llvmpipe_is_resource_referenced; pipe->transfer_flush_region = u_default_transfer_flush_region; pipe->transfer_inline_write = u_default_transfer_inline_write; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index b789c0f4090..b4a0dfd1c36 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -243,4 +243,14 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); extern void llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); + +#define LP_UNREFERENCED 0 +#define LP_REFERENCED_FOR_READ (1 << 0) +#define LP_REFERENCED_FOR_WRITE (1 << 1) + +unsigned int +llvmpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *presource, + unsigned level, int layer); + #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c index 3680f4622da..496b7f5ec6d 100644 --- a/src/gallium/drivers/noop/noop_pipe.c +++ b/src/gallium/drivers/noop/noop_pipe.c @@ -87,13 +87,6 @@ struct noop_resource { struct sw_displaytarget *dt; }; -static unsigned noop_is_resource_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - return PIPE_UNREFERENCED; -} - static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) { @@ -267,7 +260,7 @@ static void noop_resource_copy_region(struct pipe_context *ctx, /* * context */ -static void noop_flush(struct pipe_context *ctx, unsigned flags, +static void noop_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { } @@ -303,7 +296,6 @@ static struct pipe_context *noop_create_context(struct pipe_screen *screen, void ctx->transfer_unmap = noop_transfer_unmap; ctx->transfer_destroy = noop_transfer_destroy; ctx->transfer_inline_write = noop_transfer_inline_write; - ctx->is_resource_referenced = noop_is_resource_referenced; noop_init_state_functions(ctx); return ctx; @@ -453,8 +445,7 @@ static boolean noop_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { return true; } diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index d0cc29104b8..01d3aa46d0e 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -314,7 +314,6 @@ const struct u_resource_vtbl nouveau_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ nouveau_buffer_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ nouveau_buffer_transfer_get, /* get_transfer */ nouveau_buffer_transfer_destroy, /* transfer_destroy */ nouveau_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c index 18bdb18ad41..d8f59dce9ed 100644 --- a/src/gallium/drivers/nouveau/nouveau_fence.c +++ b/src/gallium/drivers/nouveau/nouveau_fence.c @@ -90,6 +90,9 @@ nouveau_fence_emit(struct nouveau_fence *fence) assert(fence->state == NOUVEAU_FENCE_STATE_AVAILABLE); + /* set this now, so that if fence.emit triggers a flush we don't recurse */ + fence->state = NOUVEAU_FENCE_STATE_EMITTED; + screen->fence.emit(&screen->base, fence->sequence); ++fence->ref; @@ -100,8 +103,6 @@ nouveau_fence_emit(struct nouveau_fence *fence) screen->fence.head = fence; screen->fence.tail = fence; - - fence->state = NOUVEAU_FENCE_STATE_EMITTED; } void @@ -215,6 +216,8 @@ nouveau_fence_wait(struct nouveau_fence *fence) void nouveau_fence_next(struct nouveau_screen *screen) { - nouveau_fence_emit(screen->fence.current); + if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTED) + nouveau_fence_emit(screen->fence.current); + nouveau_fence_new(screen, &screen->fence.current, FALSE); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e6cd3064c9a..401155bba6e 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -12,6 +12,7 @@ #include <errno.h> #include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_mm.h" #include "nouveau_winsys.h" #include "nouveau_screen.h" #include "nouveau_fence.h" @@ -154,20 +155,19 @@ nouveau_screen_fence_ref(struct pipe_screen *pscreen, nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); } -static int +static boolean nouveau_screen_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *pfence, - unsigned flags) + struct pipe_fence_handle *pfence) { - return !nouveau_fence_signalled(nouveau_fence(pfence)); + return nouveau_fence_signalled(nouveau_fence(pfence)); } -static int +static boolean nouveau_screen_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *pfence, - unsigned flags) + uint64_t timeout) { - return !nouveau_fence_wait(nouveau_fence(pfence)); + return nouveau_fence_wait(nouveau_fence(pfence)); } @@ -210,26 +210,6 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, } } - -unsigned int -nouveau_reference_flags(struct nouveau_bo *bo) -{ - uint32_t bo_flags; - int flags = 0; - - bo_flags = nouveau_bo_pending(bo); - if (bo_flags & NOUVEAU_BO_RD) - flags |= PIPE_REFERENCED_FOR_READ; - if (bo_flags & NOUVEAU_BO_WR) - flags |= PIPE_REFERENCED_FOR_WRITE; - - return flags; -} - - - - - int nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) { diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index c091abf2786..186ada39677 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -74,10 +74,6 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle, unsigned *out_stride); -unsigned int -nouveau_reference_flags(struct nouveau_bo *bo); - - int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); void nouveau_screen_fini(struct nouveau_screen *); diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 912367b8391..930cee7c1e7 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -30,25 +30,20 @@ #include "nouveau/nouveau_reloc.h" static void -nv50_flush(struct pipe_context *pipe, unsigned flags, +nv50_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->base.channel; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, RING_3D_(NV50_GRAPH_WAIT_FOR_IDLE), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); - OUT_RING (chan, 0x20); - } + struct nouveau_screen *screen = &nv50_context(pipe)->screen->base; if (fence) - nouveau_fence_ref(nv50->screen->base.fence.current, - (struct nouveau_fence **)fence); + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + /* Try to emit before firing to avoid having to flush again right after + * in case we have to wait on this fence. + */ + nouveau_fence_emit(screen->fence.current); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) - FIRE_RING(chan); + FIRE_RING(screen->channel); } void @@ -64,10 +59,34 @@ nv50_default_flush_notify(struct nouveau_channel *chan) } static void +nv50_context_unreference_resources(struct nv50_context *nv50) +{ + unsigned s, i; + + for (i = 0; i < NV50_BUFCTX_COUNT; ++i) + nv50_bufctx_reset(nv50, i); + + for (i = 0; i < nv50->num_vtxbufs; ++i) + pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + + for (s = 0; s < 3; ++s) { + for (i = 0; i < nv50->num_textures[s]; ++i) + pipe_sampler_view_reference(&nv50->textures[s][i], NULL); + + for (i = 0; i < 16; ++i) + pipe_resource_reference(&nv50->constbuf[s][i], NULL); + } +} + +static void nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + nv50_context_unreference_resources(nv50); + draw_destroy(nv50->draw); if (nv50->screen->cur_ctx == nv50) { diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index ae6b26af1eb..9eeca05ada3 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -106,7 +106,6 @@ const struct u_resource_vtbl nv50_miptree_vtbl = { nv50_miptree_get_handle, /* get_handle */ nv50_miptree_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ nv50_miptree_transfer_new, /* get_transfer */ nv50_miptree_transfer_del, /* transfer_destroy */ nv50_miptree_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/nv50/nv50_resource.c b/src/gallium/drivers/nv50/nv50_resource.c index 2a2fb0e32bc..1ae4d70a845 100644 --- a/src/gallium/drivers/nv50/nv50_resource.c +++ b/src/gallium/drivers/nv50/nv50_resource.c @@ -3,22 +3,6 @@ #include "nv50_resource.h" #include "nouveau/nouveau_screen.h" -static unsigned -nv50_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, int layer) -{ - struct nv04_resource *res = nv04_resource(resource); - unsigned flags = 0; - unsigned bo_flags = nouveau_bo_pending(res->bo); - - if (bo_flags & NOUVEAU_BO_RD) - flags = PIPE_REFERENCED_FOR_READ; - if (bo_flags & NOUVEAU_BO_WR) - flags |= PIPE_REFERENCED_FOR_WRITE; - - return flags; -} static struct pipe_resource * nv50_resource_create(struct pipe_screen *screen, @@ -52,7 +36,6 @@ nv50_init_resource_functions(struct pipe_context *pcontext) pcontext->transfer_unmap = u_transfer_unmap_vtbl; pcontext->transfer_destroy = u_transfer_destroy_vtbl; pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; - pcontext->is_resource_referenced = nv50_resource_is_referenced; pcontext->create_surface = nv50_miptree_surface_new; pcontext->surface_destroy = nv50_miptree_surface_del; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ae0365eb5c5..7690c80eef0 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -40,7 +40,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bindings, unsigned geom_flags) + unsigned bindings) { if (sample_count > 1) return FALSE; @@ -127,7 +127,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 980bc369293..db257159698 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -796,10 +796,13 @@ nv50_set_index_buffer(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); - if (ib) + if (ib) { + pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); + memcpy(&nv50->idxbuf, ib, sizeof(nv50->idxbuf)); - else - nv50->idxbuf.buffer = NULL; + } else { + pipe_resource_reference(&nv50->idxbuf.buffer, NULL); + } } static void @@ -866,5 +869,7 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->set_vertex_buffers = nv50_set_vertex_buffers; pipe->set_index_buffer = nv50_set_index_buffer; + + pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index bf46296e7ef..f3d45eb95e0 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -43,8 +43,9 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; + /* only register for writing, otherwise we'd always serialize here */ nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } if (fb->zsbuf) { @@ -74,7 +75,7 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING; nv50_bufctx_add_resident(nv50, NV50_BUFCTX_FRAME, &mt->base, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } else { BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); @@ -253,6 +254,35 @@ nv50_validate_rasterizer(struct nv50_context *nv50) OUT_RINGp(chan, nv50->rast->state, nv50->rast->size); } +static void +nv50_switch_pipe_context(struct nv50_context *ctx_to) +{ + struct nv50_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NV50_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NV50_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NV50_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~NV50_NEW_RASTERIZER; + if (!ctx_to->zsa) + ctx_to->dirty &= ~NV50_NEW_ZSA; + + ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = + ctx_to; +} + static struct state_validate { void (*func)(struct nv50_context *); uint32_t states; @@ -292,11 +322,9 @@ boolean nv50_state_validate(struct nv50_context *nv50) { unsigned i; -#if 0 - if (nv50->screen->cur_ctx != nv50) /* FIXME: not everything is valid */ - nv50->dirty = 0xffffffff; -#endif - nv50->screen->cur_ctx = nv50; + + if (nv50->screen->cur_ctx != nv50) + nv50_switch_pipe_context(nv50); if (nv50->dirty) { for (i = 0; i < validate_list_len; ++i) { diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index cf5b92ef1a8..515e3e78d42 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -49,7 +49,7 @@ struct nv50_vertex_stateobj { boolean need_conversion; unsigned vertex_size; unsigned packet_vertex_limit; - struct nv50_vertex_element element[1]; + struct nv50_vertex_element element[0]; }; #endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 4f0a5018459..abdb9ce2f93 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -51,10 +51,8 @@ nv50_vertex_state_create(struct pipe_context *pipe, struct translate_key transkey; unsigned i; - assert(num_elements); - so = MALLOC(sizeof(*so) + - (num_elements - 1) * sizeof(struct nv50_vertex_element)); + num_elements * sizeof(struct nv50_vertex_element)); if (!so) return NULL; so->num_elements = num_elements; diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 5857f7cee9e..94fa081ad7e 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -927,6 +927,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_ZETA_BASE_LAYER 0x0000179c +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc + +#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0 + +#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002 +#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003 + #define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0)) #define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004 #define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004 diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index d6de979b132..5d2168e600b 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -30,29 +30,45 @@ #include "nouveau/nouveau_reloc.h" static void -nvc0_flush(struct pipe_context *pipe, unsigned flags, +nvc0_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { - struct nvc0_context *nvc0 = nvc0_context(pipe); - struct nouveau_channel *chan = nvc0->screen->base.channel; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, RING_3D(SERIALIZE), 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1); - OUT_RING (chan, 0x00); - } else - if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) { - BEGIN_RING(chan, RING_3D(SERIALIZE), 1); - OUT_RING (chan, 0); - } + struct nouveau_screen *screen = &nvc0_context(pipe)->screen->base; if (fence) - nouveau_fence_ref(nvc0->screen->base.fence.current, - (struct nouveau_fence **)fence); + nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence); + + /* Try to emit before firing to avoid having to flush again right after + * in case we have to wait on this fence. + */ + nouveau_fence_emit(screen->fence.current); + + FIRE_RING(screen->channel); +} + +static void +nvc0_context_unreference_resources(struct nvc0_context *nvc0) +{ + unsigned s, i; + + for (i = 0; i < NVC0_BUFCTX_COUNT; ++i) + nvc0_bufctx_reset(nvc0, i); + + for (i = 0; i < nvc0->num_vtxbufs; ++i) + pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); + + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); - if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) - FIRE_RING(chan); + for (s = 0; s < 5; ++s) { + for (i = 0; i < nvc0->num_textures[s]; ++i) + pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); + + for (i = 0; i < 16; ++i) + pipe_resource_reference(&nvc0->constbuf[s][i], NULL); + } + + for (i = 0; i < nvc0->num_tfbbufs; ++i) + pipe_resource_reference(&nvc0->tfbbuf[i], NULL); } static void @@ -60,6 +76,8 @@ nvc0_destroy(struct pipe_context *pipe) { struct nvc0_context *nvc0 = nvc0_context(pipe); + nvc0_context_unreference_resources(nvc0); + draw_destroy(nvc0->draw); if (nvc0->screen->cur_ctx == nvc0) { diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 67c5a1287b1..102997e4fcb 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -79,6 +79,7 @@ struct nvc0_context { uint32_t instance_base; int32_t index_bias; boolean prim_restart; + boolean early_z; uint8_t num_vtxbufs; uint8_t num_vtxelts; uint8_t num_textures[5]; @@ -135,6 +136,9 @@ struct nvc0_context { struct draw_context *draw; }; +#define NVC0_USING_EDGEFLAG(ctx) \ + ((ctx)->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS) + static INLINE struct nvc0_context * nvc0_context(struct pipe_context *pipe) { diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h index 8da963a4c5b..b7d0d3eafa4 100644 --- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h +++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h @@ -144,7 +144,7 @@ static const uint32_t nvc0_9097_poly_mode_back[] = 0x00180611, /* 0x05: mov $r6 0x60 */ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */ 0x0000f807, /* 0x07: braz $r7 0xa */ - 0x00dac021, /* 0x08: maddr 0x36b */ + 0x00db0021, /* 0x08: maddr 0x36c */ 0x00800611, /* 0x09: mov $r6 0x200 */ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */ diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c index db9117c3ff1..bced3245524 100644 --- a/src/gallium/drivers/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nvc0/nvc0_miptree.c @@ -114,7 +114,6 @@ const struct u_resource_vtbl nvc0_miptree_vtbl = { nvc0_miptree_get_handle, /* get_handle */ nvc0_miptree_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ nvc0_miptree_transfer_new, /* get_transfer */ nvc0_miptree_transfer_del, /* transfer_destroy */ nvc0_miptree_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index f51d289e8cd..bd85a7f1ffd 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -25,6 +25,14 @@ #include "nvc0_pc.h" #include "nvc0_program.h" +uint8_t +nvc0_ir_reverse_cc(uint8_t cc) +{ + static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + + return cc_swapped[cc & 7] | (cc & ~7); +} + boolean nvc0_insn_can_load(struct nv_instruction *nvi, int s, struct nv_instruction *ld) diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index efa073a9201..3a5612a5fac 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -210,6 +210,8 @@ #define NV_CC_P 0 #define NV_CC_NOT_P 1 +uint8_t nvc0_ir_reverse_cc(uint8_t cc); + #define NV_PC_MAX_INSTRUCTIONS 2048 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4) @@ -219,7 +221,7 @@ struct nv_op_info { uint base; /* e.g. ADD_S32 -> ADD */ char name[12]; uint8_t type; - uint8_t mods; + uint16_t mods; unsigned flow : 1; unsigned commutative : 1; unsigned vector : 1; @@ -234,12 +236,6 @@ extern struct nv_op_info nvc0_op_info_table[]; #define NV_BASEOP(op) (nvc0_op_info_table[op].base) #define NV_OPTYPE(op) (nvc0_op_info_table[op].type) -static INLINE uint -nv_op_base(uint opcode) -{ - return nvc0_op_info_table[opcode].base; -} - static INLINE boolean nv_is_texture_op(uint opcode) { @@ -259,9 +255,9 @@ nv_op_commutative(uint opcode) } static INLINE uint8_t -nv_op_supported_src_mods(uint opcode) +nv_op_supported_src_mods(uint opcode, int s) { - return nvc0_op_info_table[opcode].mods; + return (nvc0_op_info_table[opcode].mods >> (s * 4)) & 0xf; } static INLINE uint diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index c10f920e6f1..76ad40dbcf8 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -610,6 +610,8 @@ emit_selp(struct nv_pc *pc, struct nv_instruction *i) static void emit_slct(struct nv_pc *pc, struct nv_instruction *i) { + uint8_t cc = i->set_cond; + pc->emit[0] = 0x00000000; switch (i->opcode) { @@ -627,7 +629,10 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i) emit_form_0(pc, i); - pc->emit[1] |= i->set_cond << 23; + if (i->src[2]->mod & NV_MOD_NEG) + cc = nvc0_ir_reverse_cc(cc); + + pc->emit[1] |= cc << 23; } static void diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index c5a7367a5fd..f7bf1680d09 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -240,8 +240,6 @@ is_immd32_load(struct nv_instruction *nvi) static INLINE void check_swap_src_0_1(struct nv_instruction *nvi) { - static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; - struct nv_ref *src0 = nvi->src[0]; struct nv_ref *src1 = nvi->src[1]; @@ -270,7 +268,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) if (nvi->src[0] != src0) { if (NV_BASEOP(nvi->opcode) == NV_OP_SET) - nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; + nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond); else if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT) nvi->set_cond = NV_CC_INVERSE(nvi->set_cond); @@ -363,7 +361,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) mod = 0; } - if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod) + if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod) continue; nv_reference(ctx->pc, nvi, j, mi->src[0]->value); diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c index 90c669cc4b8..1f37cb802d7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_print.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c @@ -269,7 +269,11 @@ nvc0_print_instruction(struct nv_instruction *i) PRINT(" %s\n", norm); } -#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG +#define NV_MOD_SGN_12 ((NV_MOD_ABS | NV_MOD_NEG) | ((NV_MOD_ABS | NV_MOD_NEG) << 4)) +#define NV_MOD_NEG_123 (NV_MOD_NEG | (NV_MOD_NEG << 4) | (NV_MOD_NEG << 8)) +#define NV_MOD_NEG_3 (NV_MOD_NEG << 8) + +#define NV_MOD_SGN NV_MOD_SGN_12 struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { @@ -292,8 +296,8 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, - { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, + { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_NEG_123, 0, 1, 0, 1, 0, 2, 2 }, { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 }, { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 }, @@ -363,9 +367,9 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] = { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 }, - { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 }, - { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_F32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_S32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, + { NV_OP_SLCT, "slct", NV_TYPE_U32, NV_MOD_NEG_3, 0, 0, 0, 1, 0, 2, 2 }, { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 }, diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 899fe147c6a..3c59213176e 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -202,6 +202,8 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x40; case TGSI_SEMANTIC_FACE: return 0x3fc; + case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */ + return 0xf00; /* case TGSI_SEMANTIC_CLIP_DISTANCE: return 0x2c0 + (si * 4); @@ -290,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti, ti->output_loc[i][c] = si * 4 + c; } } else { + if (sn == TGSI_SEMANTIC_EDGEFLAG) + ti->edgeflag_out = i; for (c = 0; c < 4; ++c) ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; /* for TFB_VARYING_LOCS: */ @@ -298,6 +302,7 @@ prog_decl(struct nvc0_translation_info *ti, } break; case TGSI_FILE_SYSTEM_VALUE: + i = first; ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); assert(first == last); break; @@ -426,6 +431,8 @@ nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { a = (ti->output_loc[i][0] - 0x40) / 4; + if (ti->output_loc[i][0] >= 0xf00) + continue; for (c = 0; c < 4; ++c, ++a) { if (!ti->output_access[i][c]) continue; @@ -528,7 +535,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) } for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { - m = ti->interp_mode[i]; + m = ti->interp_mode[i] & 3; for (c = 0; c < 4; ++c) { if (!ti->input_access[i][c]) continue; @@ -669,6 +676,8 @@ nvc0_program_translate(struct nvc0_program *prog) ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + prog->vp.edgeflag = PIPE_MAX_ATTRIBS; + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) ti->append_ucp = TRUE; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 68544c90d29..2e9f4c10928 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -15,9 +15,6 @@ struct push_context { void *idxbuf; - float edgeflag; - int edgeflag_attr; - uint32_t vertex_words; uint32_t packet_vertex_limit; @@ -27,8 +24,56 @@ struct push_context { uint32_t prim; uint32_t restart_index; uint32_t instance_id; + + struct { + int buffer; + float value; + uint8_t *data; + unsigned offset; + unsigned stride; + } edgeflag; }; +static void +init_push_context(struct nvc0_context *nvc0, struct push_context *ctx) +{ + struct pipe_vertex_element *ve; + + ctx->chan = nvc0->screen->base.channel; + ctx->translate = nvc0->vertex->translate; + + ctx->edgeflag.value = 0.5f; + + if (NVC0_USING_EDGEFLAG(nvc0)) { + ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe; + + ctx->edgeflag.buffer = ve->vertex_buffer_index; + ctx->edgeflag.offset = ve->src_offset; + + ctx->packet_vertex_limit = 1; + } else { + ctx->edgeflag.buffer = -1; + ctx->edgeflag.offset = 0; + ctx->edgeflag.stride = 0; + ctx->edgeflag.data = NULL; + + ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; + } + + ctx->vertex_words = nvc0->vertex->vtx_size; +} + +static INLINE void +set_edgeflag(struct push_context *ctx, unsigned vtx_id) +{ + float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride); + + if (ctx->edgeflag.value != f) { + ctx->edgeflag.value = f; + IMMED_RING(ctx->chan, RING_3D(EDGEFLAG_ENABLE), f ? 1 : 0); + } +} + static INLINE unsigned prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index) { @@ -62,7 +107,7 @@ prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index) static void emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) { - uint8_t *elts = (uint8_t *)ctx->idxbuf + start; + uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); @@ -72,6 +117,9 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i08(elts, push, ctx->restart_index); + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + size = ctx->vertex_words * nr; BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); @@ -97,7 +145,7 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) { - uint16_t *elts = (uint16_t *)ctx->idxbuf + start; + uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); @@ -107,6 +155,9 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i16(elts, push, ctx->restart_index); + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + size = ctx->vertex_words * nr; BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); @@ -132,7 +183,7 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) static void emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) { - uint32_t *elts = (uint32_t *)ctx->idxbuf + start; + uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; while (count) { unsigned push = MIN2(count, ctx->packet_vertex_limit); @@ -142,6 +193,9 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) if (ctx->primitive_restart) nr = prim_restart_search_i32(elts, push, ctx->restart_index); + if (unlikely(ctx->edgeflag.buffer >= 0) && nr) + set_edgeflag(ctx, elts[0]); + size = ctx->vertex_words * nr; BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); @@ -171,6 +225,9 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) unsigned push = MIN2(count, ctx->packet_vertex_limit); unsigned size = ctx->vertex_words * push; + if (unlikely(ctx->edgeflag.buffer >= 0)) + set_edgeflag(ctx, start); + BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size); ctx->translate->run(ctx->translate, start, push, ctx->instance_id, @@ -219,10 +276,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) unsigned inst = info->instance_count; boolean apply_bias = info->indexed && info->index_bias; - ctx.chan = nvc0->screen->base.channel; - ctx.translate = nvc0->vertex->translate; - ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max; - ctx.vertex_words = nvc0->vertex->vtx_size; + init_push_context(nvc0, &ctx); for (i = 0; i < nvc0->num_vtxbufs; ++i) { uint8_t *data; @@ -236,12 +290,18 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + + if (unlikely(i == ctx.edgeflag.buffer)) { + ctx.edgeflag.data = data + ctx.edgeflag.offset; + ctx.edgeflag.stride = vb->stride; + } } if (info->indexed) { - ctx.idxbuf = nouveau_resource_map_offset(&nvc0->base, - nv04_resource(nvc0->idxbuf.buffer), - nvc0->idxbuf.offset, NOUVEAU_BO_RD); + ctx.idxbuf = + nouveau_resource_map_offset(&nvc0->base, + nv04_resource(nvc0->idxbuf.buffer), + nvc0->idxbuf.offset, NOUVEAU_BO_RD); if (!ctx.idxbuf) return; index_size = nvc0->idxbuf.index_size; @@ -283,6 +343,9 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } + if (unlikely(ctx.edgeflag.value == 0.0f)) + IMMED_RING(ctx.chan, RING_3D(EDGEFLAG_ENABLE), 1); + if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c index fb5a496b84b..44e66314e7e 100644 --- a/src/gallium/drivers/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nvc0/nvc0_resource.c @@ -3,25 +3,6 @@ #include "nvc0_resource.h" #include "nouveau/nouveau_screen.h" -static unsigned -nvc0_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned face, int layer) -{ - struct nv04_resource *res = nv04_resource(resource); - unsigned flags = 0; - -#ifdef NOUVEAU_USERSPACE_MM - flags = res->status; -#else - unsigned bo_flags = nouveau_bo_pending(res->bo); - if (bo_flags & NOUVEAU_BO_RD) - flags = PIPE_REFERENCED_FOR_READ; - if (bo_flags & NOUVEAU_BO_WR) - flags |= PIPE_REFERENCED_FOR_WRITE; -#endif - return flags; -} static struct pipe_resource * nvc0_resource_create(struct pipe_screen *screen, @@ -55,7 +36,6 @@ nvc0_init_resource_functions(struct pipe_context *pcontext) pcontext->transfer_unmap = u_transfer_unmap_vtbl; pcontext->transfer_destroy = u_transfer_destroy_vtbl; pcontext->transfer_inline_write = u_transfer_inline_write_vtbl; - pcontext->is_resource_referenced = nvc0_resource_is_referenced; pcontext->create_surface = nvc0_miptree_surface_new; pcontext->surface_destroy = nvc0_miptree_surface_del; } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index d430be92c51..1047ba3c337 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -34,7 +34,7 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bindings, unsigned geom_flags) + unsigned bindings) { if (sample_count > 1) return FALSE; @@ -111,7 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); @@ -206,7 +207,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_bo_ref(NULL, &screen->tls); nouveau_bo_ref(NULL, &screen->txc); nouveau_bo_ref(NULL, &screen->fence.bo); - nouveau_bo_ref(NULL, &screen->mp_stack_bo); + nouveau_bo_ref(NULL, &screen->vfetch_cache); nouveau_resource_destroy(&screen->text_heap); @@ -495,14 +496,14 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, 0); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, - &screen->mp_stack_bo); + &screen->vfetch_cache); if (ret) goto fail; - BEGIN_RING(chan, RING_3D_(0x17bc), 3); - OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); - OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); - OUT_RING (chan, 1); + BEGIN_RING(chan, RING_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); + OUT_RELOCh(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RELOCl(chan, screen->vfetch_cache, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + OUT_RING (chan, 3); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc); if (ret) @@ -634,7 +635,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen) nouveau_bo_validate(chan, screen->text, flags); nouveau_bo_validate(chan, screen->uniforms, flags); nouveau_bo_validate(chan, screen->txc, flags); - nouveau_bo_validate(chan, screen->mp_stack_bo, flags); + nouveau_bo_validate(chan, screen->vfetch_cache, flags); if (screen->cur_ctx && screen->cur_ctx->state.tls_required) nouveau_bo_validate(chan, screen->tls, flags); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 81f404ada83..94bf0cf3481 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -26,7 +26,7 @@ struct nvc0_screen { struct nouveau_bo *uniforms; struct nouveau_bo *tls; struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ - struct nouveau_bo *mp_stack_bo; + struct nouveau_bo *vfetch_cache; uint64_t tls_size; diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 79b5f3d81cc..7294eaa222e 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -111,8 +111,6 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) return; nvc0_program_update_context_state(nvc0, fp, 4); - BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1); - OUT_RING (chan, fp->fp.early_z); BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2); OUT_RING (chan, 0x51); OUT_RING (chan, fp->code_base); diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index f230292316f..ab68abcfb5a 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -166,6 +166,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { struct nvc0_rasterizer_stateobj *so; + uint32_t reg; so = CALLOC_STRUCT(nvc0_rasterizer_stateobj); if (!so) @@ -202,6 +203,13 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_BEGIN_3D(so, POINT_SIZE, 1); SB_DATA (so, fui(cso->point_size)); } + + reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ? + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT : + NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; + + SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1); + SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg); SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization); SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth); @@ -268,14 +276,11 @@ nvc0_zsa_state_create(struct pipe_context *pipe, so->pipe = *cso; - SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); - SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1); + SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled); if (cso->depth.enabled) { - SB_DATA (so, 1); + SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask); SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1); SB_DATA (so, nvgl_comparison_op(cso->depth.func)); - } else { - SB_DATA (so, 0); } if (cso->stencil[0].enabled) { @@ -307,15 +312,12 @@ nvc0_zsa_state_create(struct pipe_context *pipe, if (cso->stencil[0].enabled) { SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0); } - - SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1); + + SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled); if (cso->alpha.enabled) { - SB_DATA (so, 1); SB_BEGIN_3D(so, ALPHA_TEST_REF, 2); SB_DATA (so, fui(cso->alpha.ref_value)); SB_DATA (so, nvgl_comparison_op(cso->alpha.func)); - } else { - SB_DATA (so, 0); } assert(so->size < (sizeof(so->state) / sizeof(so->state[0]))); @@ -706,10 +708,13 @@ nvc0_set_index_buffer(struct pipe_context *pipe, { struct nvc0_context *nvc0 = nvc0_context(pipe); - if (ib) - memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); - else - nvc0->idxbuf.buffer = NULL; + if (ib) { + pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); + + memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf)); + } else { + pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 6fd880829e4..bb81480bab9 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -93,8 +93,9 @@ nvc0_validate_fb(struct nvc0_context *nvc0) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; + /* only register for writing, otherwise we'd always serialize here */ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } if (fb->zsbuf) { @@ -127,7 +128,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } else { BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1); OUT_RING (chan, 0); @@ -294,32 +295,6 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) } static void -nvc0_validate_sprite_coords(struct nvc0_context *nvc0) -{ - struct nouveau_channel *chan = nvc0->screen->base.channel; - uint32_t reg; - - if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) - reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT; - else - reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT; - - if (nvc0->rast->pipe.point_quad_rasterization) { - uint32_t en = nvc0->rast->pipe.sprite_coord_enable; - - while (en) { - int i = ffs(en) - 1; - en &= ~(1 << i); - if (i >= 0 && i < 8) - reg |= 8 << i; - } - } - - BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1); - OUT_RING (chan, reg); -} - -static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; @@ -413,6 +388,49 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } } +static void +nvc0_validate_derived_1(struct nvc0_context *nvc0) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + boolean early_z; + + early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled; + + if (early_z != nvc0->state.early_z) { + nvc0->state.early_z = early_z; + IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z); + } +} + +static void +nvc0_switch_pipe_context(struct nvc0_context *ctx_to) +{ + struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx; + + if (ctx_from) + ctx_to->state = ctx_from->state; + + ctx_to->dirty = ~0; + + if (!ctx_to->vertex) + ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + + if (!ctx_to->vertprog) + ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + if (!ctx_to->fragprog) + ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + + if (!ctx_to->blend) + ctx_to->dirty &= ~NVC0_NEW_BLEND; + if (!ctx_to->rast) + ctx_to->dirty &= ~NVC0_NEW_RASTERIZER; + if (!ctx_to->zsa) + ctx_to->dirty &= ~NVC0_NEW_ZSA; + + ctx_to->screen->base.channel->user_private = ctx_to->screen->cur_ctx = + ctx_to; +} + static struct state_validate { void (*func)(struct nvc0_context *); uint32_t states; @@ -432,7 +450,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, - { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG }, + { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, @@ -445,11 +463,9 @@ boolean nvc0_state_validate(struct nvc0_context *nvc0) { unsigned i; -#if 0 - if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */ - nvc0->dirty = 0xffffffff; -#endif - nvc0->screen->cur_ctx = nvc0; + + if (nvc0->screen->cur_ctx != nvc0) + nvc0_switch_pipe_context(nvc0); if (nvc0->dirty) { for (i = 0; i < validate_list_len; ++i) { diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 17fc51b8aac..fc5f45ea25d 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -240,7 +240,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, struct nvc0_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); - unsigned ny = util_format_get_nblocksx(src->format, src_box->height); + unsigned ny = util_format_get_nblocksy(src->format, src_box->height); nvc0_setup_m2mf_rect(&drect, dst, dst_level, dstx, dsty, dstz); nvc0_setup_m2mf_rect(&srect, src, src_level, diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index e7e7ce7dc22..6bbcf2447ec 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -51,8 +51,6 @@ nvc0_vertex_state_create(struct pipe_context *pipe, struct translate_key transkey; unsigned i; - assert(num_elements); - so = MALLOC(sizeof(*so) + num_elements * sizeof(struct nvc0_vertex_element)); if (!so) @@ -265,7 +263,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) struct nvc0_vertex_element *ve; unsigned i; - if (unlikely(vertex->need_conversion)) { + if (unlikely(vertex->need_conversion || NVC0_USING_EDGEFLAG(nvc0))) { nvc0->vbo_fifo = ~0; nvc0->vbo_user = 0; } else { diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 6c8934d3a4a..2bcb93d93e3 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -7,7 +7,7 @@ #include "nvfx_resource.h" static void -nvfx_flush(struct pipe_context *pipe, unsigned flags, +nvfx_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence) { struct nvfx_context *nvfx = nvfx_context(pipe); @@ -16,12 +16,13 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags, struct nouveau_grobj *eng3d = screen->eng3d; /* XXX: we need to actually be intelligent here */ - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + /* XXX This flag wasn't set by the state tracker anyway. */ + /*if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 2); BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING(chan, 1); - } + }*/ FIRE_RING(chan); if (fence) diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 8c043b867ba..1be84b90a90 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -138,11 +138,11 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso // on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it // TODO: may want to revisit this - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_RENDER_TARGET)) mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET; - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_SAMPLER_VIEW)) mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW; - if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL, 0)) + if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, 0, PIPE_BIND_DEPTH_STENCIL)) mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL; return mt; diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c index c60a7bb8b93..42e77c53621 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.c +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -4,13 +4,6 @@ #include "nvfx_resource.h" #include "nouveau/nouveau_screen.h" -static unsigned int -nvfx_resource_is_referenced(struct pipe_context *pipe, - struct pipe_resource *pr, - unsigned level, int layer) -{ - return !!nouveau_reference_flags(nvfx_resource(pr)->bo); -} static struct pipe_resource * nvfx_resource_create(struct pipe_screen *screen, @@ -58,8 +51,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen, void nvfx_init_resource_functions(struct pipe_context *pipe) { - pipe->is_resource_referenced = nvfx_resource_is_referenced; - pipe->create_surface = nvfx_miptree_surface_new; pipe->surface_destroy = nvfx_miptree_surface_del; } diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 15db574a1d0..8742f60c163 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -203,7 +203,7 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, unsigned geom_flags) + unsigned bind) { struct nvfx_screen *screen = nvfx_screen(pscreen); diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 40ae4f5bd21..ae9c31418c3 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -426,7 +426,7 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); warned = TRUE; } - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->pipe.flush(&nvfx->pipe, NULL); nvfx->dirty |= (NVFX_NEW_VIEWPORT | NVFX_NEW_VERTPROG | NVFX_NEW_ARRAYS); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 6391ea7f3be..37b635fd120 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -263,7 +263,7 @@ static void r300_clear(struct pipe_context* pipe, /* Reserve CS space. */ if (dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); } /* Emit clear packets. */ @@ -447,11 +447,11 @@ static void r300_resource_copy_region(struct pipe_context *pipe, !pipe->screen->is_format_supported(pipe->screen, src->format, src->target, src->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0) || + PIPE_BIND_SAMPLER_VIEW) || !pipe->screen->is_format_supported(pipe->screen, dst->format, dst->target, dst->nr_samples, - PIPE_BIND_RENDER_TARGET, 0))) { + PIPE_BIND_RENDER_TARGET))) { switch (util_format_get_blocksize(old_dst.format)) { case 1: new_dst.format = PIPE_FORMAT_I8_UNORM; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index b8db6fb6c12..720d666d98c 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -78,10 +78,8 @@ static void r300_release_referenced_objects(struct r300_context *r300) NULL); } - /* The dummy VBO. */ + /* Manually-created vertex buffers. */ pipe_resource_reference(&r300->dummy_vb, NULL); - - /* The SWTCL VBO. */ pipe_resource_reference(&r300->vbo, NULL); /* If there are any queries pending or not destroyed, remove them now. */ @@ -141,11 +139,11 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300); } -void r300_flush_cb(void *data) +static void r300_flush_callback(void *data, unsigned flags) { struct r300_context* const cs_context_copy = data; - cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); + r300_flush(&cs_context_copy->context, flags, NULL); } #define R300_INIT_ATOM(atomname, atomsize) \ @@ -455,7 +453,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_render_functions(r300); r300_init_states(&r300->context); - rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + rws->cs_set_flush(r300->cs, r300_flush_callback, r300); /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 58e1094e339..e395f41290e 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -65,7 +65,10 @@ struct r300_aa_state { }; struct r300_blend_state { - uint32_t cb[8]; + struct pipe_blend_state state; + + uint32_t cb_clamp[8]; + uint32_t cb_noclamp[8]; uint32_t cb_no_readwrite[8]; }; @@ -610,6 +613,8 @@ struct r300_context { boolean vertex_arrays_dirty; boolean vertex_arrays_indexed; int vertex_arrays_offset; + int vertex_arrays_instance_id; + boolean instancing_enabled; }; #define foreach_atom(r300, atom) \ @@ -663,8 +668,6 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300, struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -void r300_flush_cb(void *data); - /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_blit_functions(struct r300_context *r300); @@ -679,6 +682,11 @@ void r300_decompress_zmask(struct r300_context *r300); void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); void r300_decompress_zmask_locked(struct r300_context *r300); +/* r300_flush.c */ +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence); + /* r300_hyperz.c */ void r300_update_hyperz_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e3945b72d7a..e17a907e77e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -45,7 +45,10 @@ void r300_emit_blend_state(struct r300_context* r300, CS_LOCALS(r300); if (fb->nr_cbufs) { - WRITE_CS_TABLE(blend->cb, size); + if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + WRITE_CS_TABLE(blend->cb_noclamp, size); + else + WRITE_CS_TABLE(blend->cb_clamp, size); } else { WRITE_CS_TABLE(blend->cb_no_readwrite, size); } @@ -794,7 +797,8 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id) { struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; @@ -804,39 +808,92 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size; - unsigned size1, size2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - hw_format_size = r300->velems->format_size; + if (instance_id == -1) { + /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + } - for (i = 0; i < vertex_array_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); - } + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + } - if (vertex_array_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } + } else { + /* Instanced arrays. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + if (velem[i+1].instance_divisor) { + stride2 = 0; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + + (instance_id / velem[i+1].instance_divisor) * vb2->stride; + } else { + stride2 = vb2->stride; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; + } - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - } + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); + OUT_CS(offset1); + OUT_CS(offset2); + } + + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } - for (i = 0; i < vertex_array_count; i++) { - buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_RELOC(buf); + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); + OUT_CS(offset1); + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } } END_CS; } @@ -1164,7 +1221,7 @@ validate: if (flushed) return FALSE; - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index acea51d942f..6c1c9d2fb13 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,7 +31,8 @@ struct r300_vertex_program_code; uint32_t pack_float24(float f); -void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed); +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 9c41a1383ce..b3d0d344ec4 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -31,9 +31,10 @@ #include "r300_cs.h" #include "r300_emit.h" -static void r300_flush(struct pipe_context* pipe, - unsigned flags, - struct pipe_fence_handle** fence) + +void r300_flush(struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence) { struct r300_context *r300 = r300_context(pipe); struct r300_atom *atom; @@ -61,7 +62,7 @@ static void r300_flush(struct pipe_context* pipe, r500_emit_index_bias(r300, 0); r300->flush_counter++; - r300->rws->cs_flush(r300->cs); + r300->rws->cs_flush(r300->cs, flags); r300->dirty_hw = 0; /* New kitchen sink, baby. */ @@ -83,20 +84,22 @@ static void r300_flush(struct pipe_context* pipe, * and we cannot emit an empty CS. We must write some regs then. */ CS_LOCALS(r300); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); - r300->rws->cs_flush(r300->cs); + r300->rws->cs_flush(r300->cs, flags); } else { /* Even if hw is not dirty, we should at least reset the CS in case * the space checking failed for the first draw operation. */ - r300->rws->cs_flush(r300->cs); + r300->rws->cs_flush(r300->cs, flags); } } +} - if (flags & PIPE_FLUSH_FRAME) { - r300->rws->cs_sync_flush(r300->cs); - } +static void r300_flush_wrapped(struct pipe_context *pipe, + struct pipe_fence_handle **fence) +{ + r300_flush(pipe, 0, fence); } void r300_init_flush_functions(struct r300_context* r300) { - r300->context.flush = r300_flush; + r300->context.flush = r300_flush_wrapped; } diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index cec7473009a..4c502fefb3f 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -149,6 +149,8 @@ static void get_external_state( unsigned i; unsigned char *swizzle; + state->frag_clamp = 0; + for (i = 0; i < texstate->sampler_state_count; i++) { struct r300_sampler_state *s = texstate->sampler_states[i]; struct r300_sampler_view *v = texstate->sampler_views[i]; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 0ec4a225865..26594dabe42 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -177,7 +177,7 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, } enum r300_prepare_flags { - PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */ + PREP_EMIT_STATES = (1 << 0), /* call emit_dirty_state and friends? */ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */ PREP_EMIT_AOS = (1 << 2), /* call emit_vertex_arrays? */ PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_vertex_arrays_swtcl? */ @@ -193,11 +193,11 @@ enum r300_prepare_flags { * \return TRUE if the CS was flushed */ static boolean r300_reserve_cs_dwords(struct r300_context *r300, - enum r300_prepare_flags flags, - unsigned cs_dwords) + enum r300_prepare_flags flags, + unsigned cs_dwords) { boolean flushed = FALSE; - boolean first_draw = flags & PREP_FIRST_DRAW; + boolean first_draw = flags & PREP_EMIT_STATES; boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; @@ -219,7 +219,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, /* Reserve requested CS space. */ if (cs_dwords > (R300_MAX_CMDBUF_DWORDS - r300->cs->cdw)) { - r300->context.flush(&r300->context, 0, NULL); + r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); flushed = TRUE; } @@ -233,15 +233,16 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, * \param index_buffer The index buffer to validate. The parameter may be NULL. * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id Index of instance to render * \return TRUE if rendering should be skipped */ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, int buffer_offset, - int index_bias) + int index_bias, int instance_id) { - boolean first_draw = flags & PREP_FIRST_DRAW; + boolean first_draw = flags & PREP_EMIT_STATES; boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; boolean emit_vertex_arrays_swtcl = flags & PREP_EMIT_AOS_SWTCL; boolean indexed = flags & PREP_INDEXED; @@ -267,12 +268,14 @@ static boolean r300_emit_states(struct r300_context *r300, if (emit_vertex_arrays && (r300->vertex_arrays_dirty || r300->vertex_arrays_indexed != indexed || - r300->vertex_arrays_offset != buffer_offset)) { - r300_emit_vertex_arrays(r300, buffer_offset, indexed); + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); r300->vertex_arrays_dirty = FALSE; r300->vertex_arrays_indexed = indexed; r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; } if (emit_vertex_arrays_swtcl) @@ -291,6 +294,7 @@ static boolean r300_emit_states(struct r300_context *r300, * \param cs_dwords The number of dwords to reserve in CS. * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id The instance to render. * \return TRUE if rendering should be skipped */ static boolean r300_prepare_for_rendering(struct r300_context *r300, @@ -298,13 +302,15 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, struct pipe_resource *index_buffer, unsigned cs_dwords, int buffer_offset, - int index_bias) + int index_bias, + int instance_id) { + /* Make sure there is enough space in the command stream and emit states. */ if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) - flags |= PREP_FIRST_DRAW; + flags |= PREP_EMIT_STATES; return r300_emit_states(r300, flags, index_buffer, buffer_offset, - index_bias); + index_bias, instance_id); } static boolean immd_is_good_idea(struct r300_context *r300, @@ -352,8 +358,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, ****************************************************************************/ static void r300_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, unsigned start, - unsigned count) + const struct pipe_draw_info *info) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -364,7 +369,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ - unsigned dwords = 4 + count * vertex_size; + unsigned dwords = 4 + info->count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; @@ -379,7 +384,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ @@ -395,21 +400,21 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, map[vbi] = (uint32_t*)r300->rws->buffer_map( r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf, r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); - map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } - r300_emit_draw_init(r300, mode, 0, count-1); + r300_emit_draw_init(r300, info->mode, 0, info->count-1); BEGIN_CS(dwords); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | - r300_translate_primitive(mode)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) | + r300_translate_primitive(info->mode)); /* Emit vertices. */ - for (v = 0; v < count; v++) { + for (v = 0; v < info->count; v++) { for (i = 0; i < vertex_element_count; i++) { OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } @@ -456,8 +461,8 @@ static void r300_emit_draw_arrays(struct r300_context *r300, static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, + unsigned min_index, + unsigned max_index, unsigned mode, unsigned start, unsigned count, @@ -467,16 +472,16 @@ static void r300_emit_draw_elements(struct r300_context *r300, boolean alt_num_verts = count > 65535; CS_LOCALS(r300); - if (count >= (1 << 24) || maxIndex >= (1 << 24)) { + if (count >= (1 << 24) || max_index >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " - "refusing to render (maxIndex: %i).\n", count, maxIndex); + "refusing to render (max_index: %i).\n", count, max_index); return; } DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", - count, minIndex, maxIndex); + count, min_index, max_index); - r300_emit_draw_init(r300, mode, minIndex, maxIndex); + r300_emit_draw_init(r300, mode, min_index, max_index); /* If start is odd, render the first triangle with indices embedded * in the command stream. This will increase start by 3 and make it @@ -527,24 +532,23 @@ static void r300_emit_draw_elements(struct r300_context *r300, } static void r300_draw_elements_immediate(struct r300_context *r300, - int indexBias, unsigned minIndex, - unsigned maxIndex, unsigned mode, - unsigned start, unsigned count) + const struct pipe_draw_info *info) { uint8_t *ptr1; uint16_t *ptr2; uint32_t *ptr4; unsigned index_size = r300->index_buffer.index_size; - unsigned i, count_dwords = index_size == 4 ? count : (count + 1) / 2; + unsigned i, count_dwords = index_size == 4 ? info->count : + (info->count + 1) / 2; CS_LOCALS(r300); /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias)) + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) return; - r300_emit_draw_init(r300, mode, minIndex, maxIndex); + r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index); BEGIN_CS(2 + count_dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); @@ -552,42 +556,42 @@ static void r300_draw_elements_immediate(struct r300_context *r300, switch (index_size) { case 1: ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr; - ptr1 += start; + ptr1 += info->start; - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300_translate_primitive(mode)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); - if (indexBias && !r300->screen->caps.is_r500) { - for (i = 0; i < count-1; i += 2) - OUT_CS(((ptr1[i+1] + indexBias) << 16) | - (ptr1[i] + indexBias)); + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr1[i+1] + info->index_bias) << 16) | + (ptr1[i] + info->index_bias)); - if (count & 1) - OUT_CS(ptr1[i] + indexBias); + if (info->count & 1) + OUT_CS(ptr1[i] + info->index_bias); } else { - for (i = 0; i < count-1; i += 2) + for (i = 0; i < info->count-1; i += 2) OUT_CS(((ptr1[i+1]) << 16) | (ptr1[i] )); - if (count & 1) + if (info->count & 1) OUT_CS(ptr1[i]); } break; case 2: ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; - ptr2 += start; + ptr2 += info->start; - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300_translate_primitive(mode)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | + r300_translate_primitive(info->mode)); - if (indexBias && !r300->screen->caps.is_r500) { - for (i = 0; i < count-1; i += 2) - OUT_CS(((ptr2[i+1] + indexBias) << 16) | - (ptr2[i] + indexBias)); + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count-1; i += 2) + OUT_CS(((ptr2[i+1] + info->index_bias) << 16) | + (ptr2[i] + info->index_bias)); - if (count & 1) - OUT_CS(ptr2[i] + indexBias); + if (info->count & 1) + OUT_CS(ptr2[i] + info->index_bias); } else { OUT_CS_TABLE(ptr2, count_dwords); } @@ -595,15 +599,15 @@ static void r300_draw_elements_immediate(struct r300_context *r300, case 4: ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr; - ptr4 += start; + ptr4 += info->start; - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | - r300_translate_primitive(mode)); + r300_translate_primitive(info->mode)); - if (indexBias && !r300->screen->caps.is_r500) { - for (i = 0; i < count; i++) - OUT_CS(ptr4[i] + indexBias); + if (info->index_bias && !r300->screen->caps.is_r500) { + for (i = 0; i < info->count; i++) + OUT_CS(ptr4[i] + info->index_bias); } else { OUT_CS_TABLE(ptr4, count_dwords); } @@ -612,21 +616,23 @@ static void r300_draw_elements_immediate(struct r300_context *r300, END_CS; } -static void r300_draw_elements(struct r300_context *r300, int indexBias, - unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) +static void r300_draw_elements(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { struct pipe_resource *indexBuffer = r300->index_buffer.buffer; unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; + unsigned start = info->start; + unsigned count = info->count; boolean alt_num_verts = r300->screen->caps.is_r500 && count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.is_r500) { - r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); + if (info->index_bias && !r300->screen->caps.is_r500) { + r300_split_index_bias(r300, info->index_bias, &buffer_offset, &index_offset); } r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, @@ -641,7 +647,7 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); - if (mode == PIPE_PRIM_TRIANGLES) { + if (info->mode == PIPE_PRIM_TRIANGLES) { memcpy(indices3, ptr + start, 6); } else { /* Copy the mapped index buffer directly to the upload buffer. @@ -660,13 +666,15 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 19, buffer_offset, indexBias)) + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | + PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; if (alt_num_verts || count <= 65535) { - r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count, indices3); + r300_emit_draw_elements(r300, indexBuffer, indexSize, info->min_index, + info->max_index, info->mode, start, count, + indices3); } else { do { if (indexSize == 2 && (start & 1)) @@ -675,8 +683,8 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, short_count = MIN2(count, 65534); r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, - mode, start, short_count, indices3); + info->min_index, info->max_index, + info->mode, start, short_count, indices3); start += short_count; count -= short_count; @@ -685,7 +693,8 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 19, buffer_offset, indexBias)) + indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; } } while (count); @@ -697,25 +706,28 @@ done: } } -static void r300_draw_arrays(struct r300_context *r300, unsigned mode, - unsigned start, unsigned count) +static void r300_draw_arrays(struct r300_context *r300, + const struct pipe_draw_info *info, + int instance_id) { boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536; + info->count > 65536; + unsigned start = info->start; + unsigned count = info->count; unsigned short_count; /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) + PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, + NULL, 9, start, 0, instance_id)) return; if (alt_num_verts || count <= 65535) { - r300_emit_draw_arrays(r300, mode, count); + r300_emit_draw_arrays(r300, info->mode, count); } else { do { short_count = MIN2(count, 65535); - r300_emit_draw_arrays(r300, mode, short_count); + r300_emit_draw_arrays(r300, info->mode, short_count); start += short_count; count -= short_count; @@ -724,53 +736,78 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) + start, 0, instance_id)) return; } } while (count); } } +static void r300_draw_arrays_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_arrays(r300, info, i); +} + +static void r300_draw_elements_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_elements(r300, info, i); +} + static void r300_draw_vbo(struct pipe_context* pipe, - const struct pipe_draw_info *info) + const struct pipe_draw_info *dinfo) { struct r300_context* r300 = r300_context(pipe); - unsigned count = info->count; + struct pipe_draw_info info = *dinfo; boolean buffers_updated, uploader_flushed; - boolean indexed = info->indexed && r300->index_buffer.buffer; - unsigned start_indexed = info->start + r300->index_buffer.offset; - int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index); + + info.indexed = info.indexed && r300->index_buffer.buffer; if (r300->skip_rendering || - !u_trim_pipe_prim(info->mode, &count)) { + !u_trim_pipe_prim(info.mode, &info.count)) { return; } r300_update_derived_state(r300); /* Start the vbuf manager and update buffers if needed. */ - u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, + u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info, &buffers_updated, &uploader_flushed); if (buffers_updated) { r300->vertex_arrays_dirty = TRUE; } /* Draw. */ - if (indexed) { - if (count <= 8 && - r300_resource(r300->index_buffer.buffer)->b.user_ptr) { - r300_draw_elements_immediate(r300, info->index_bias, - info->min_index, max_index, - info->mode, start_indexed, count); + if (info.indexed) { + info.start += r300->index_buffer.offset; + info.max_index = MIN2(r300->vbuf_mgr->max_index, info.max_index); + + if (info.instance_count <= 1) { + if (info.count <= 8 && + r300_resource(r300->index_buffer.buffer)->b.user_ptr) { + r300_draw_elements_immediate(r300, &info); + } else { + r300_draw_elements(r300, &info, -1); + } } else { - r300_draw_elements(r300, info->index_bias, info->min_index, - max_index, info->mode, start_indexed, count); + r300_draw_elements_instanced(r300, &info); } } else { - if (immd_is_good_idea(r300, count)) { - r300_draw_arrays_immediate(r300, info->mode, info->start, count); + if (info.instance_count <= 1) { + if (immd_is_good_idea(r300, info.count)) { + r300_draw_arrays_immediate(r300, &info); + } else { + r300_draw_arrays(r300, &info, -1); + } } else { - r300_draw_arrays(r300, info->mode, info->start, count); + r300_draw_arrays_instanced(r300, &info); } } @@ -805,7 +842,7 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe, r300_update_derived_state(r300); r300_reserve_cs_dwords(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | (indexed ? PREP_INDEXED : 0), indexed ? 256 : 6); @@ -987,13 +1024,13 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, + NULL, 0, 0, -1)) return; } @@ -1027,13 +1064,13 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, - PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0)) + PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, + NULL, 0, 0, -1)) return; } @@ -1070,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, -1)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1174,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c index f3d8c5b889f..6593474b1f0 100644 --- a/src/gallium/drivers/r300/r300_resource.c +++ b/src/gallium/drivers/r300/r300_resource.c @@ -38,13 +38,6 @@ r300_resource_create(struct pipe_screen *screen, } -static unsigned r300_resource_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - return r300_buffer_is_referenced(context, buf); -} - void r300_init_resource_functions(struct r300_context *r300) { r300->context.get_transfer = u_get_transfer_vtbl; @@ -53,7 +46,6 @@ void r300_init_resource_functions(struct r300_context *r300) r300->context.transfer_unmap = u_transfer_unmap_vtbl; r300->context.transfer_destroy = u_transfer_destroy_vtbl; r300->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r300->context.is_resource_referenced = r300_resource_is_referenced_by_cs; r300->context.create_surface = r300_create_surface; r300->context.surface_destroy = r300_surface_destroy; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 52d0247fbfd..8a69628c53e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -24,6 +24,7 @@ #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "util/u_memory.h" +#include "os/os_time.h" #include "r300_context.h" #include "r300_texture.h" @@ -112,6 +113,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; @@ -124,12 +126,15 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_STREAM_OUTPUT: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_INSTANCED_DRAWING: case PIPE_CAP_ARRAY_TEXTURES: return 0; + /* SWTCL-only features. */ + case PIPE_CAP_STREAM_OUTPUT: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TGSI_INSTANCEID: + return !r300screen->caps.has_tcl; + /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: @@ -301,8 +306,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { struct r300_winsys_screen *rws = r300_screen(screen)->rws; uint32_t retval = 0; @@ -314,9 +318,13 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10SG10SB10SA2U_NORM; boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM || - format == PIPE_FORMAT_RGTC1_SNORM; + format == PIPE_FORMAT_RGTC1_SNORM || + format == PIPE_FORMAT_LATC1_UNORM || + format == PIPE_FORMAT_LATC1_SNORM; boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM || - format == PIPE_FORMAT_RGTC2_SNORM; + format == PIPE_FORMAT_RGTC2_SNORM || + format == PIPE_FORMAT_LATC2_UNORM || + format == PIPE_FORMAT_LATC2_SNORM; boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || format == PIPE_FORMAT_R16G16_FLOAT || format == PIPE_FORMAT_R16G16B16_FLOAT || @@ -414,25 +422,40 @@ static void r300_fence_reference(struct pipe_screen *screen, (struct r300_winsys_bo*)fence); } -static int r300_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence) { struct r300_winsys_screen *rws = r300_screen(screen)->rws; struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; - return !rws->buffer_is_busy(rfence) ? 0 : 1; /* 0 == success */ + return !rws->buffer_is_busy(rfence); } -static int r300_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) +static boolean r300_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + uint64_t timeout) { struct r300_winsys_screen *rws = r300_screen(screen)->rws; struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence; + if (timeout != PIPE_TIMEOUT_INFINITE) { + int64_t start_time = os_time_get(); + + /* Convert to microseconds. */ + timeout /= 1000; + + /* Wait in a loop. */ + while (rws->buffer_is_busy(rfence)) { + if (os_time_get() - start_time >= timeout) { + return FALSE; + } + os_time_sleep(10); + } + return TRUE; + } + rws->buffer_wait(rfence); - return 0; /* 0 == success */ + return TRUE; } struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 1045911f3ae..986ae384fbf 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -33,21 +33,6 @@ #include "r300_screen_buffer.h" #include "r300_winsys.h" -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf) -{ - struct r300_context *r300 = r300_context(context); - struct r300_resource *rbuf = r300_resource(buf); - - if (rbuf->b.user_ptr || rbuf->constant_buffer) - return PIPE_UNREFERENCED; - - if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf)) - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; - - return PIPE_UNREFERENCED; -} - void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, @@ -181,7 +166,6 @@ static const struct u_resource_vtbl r300_buffer_vtbl = { NULL, /* get_handle */ r300_buffer_destroy, /* resource_destroy */ - NULL, /* is_buffer_referenced */ r300_buffer_get_transfer, /* get_transfer */ r300_buffer_transfer_destroy, /* transfer_destroy */ r300_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 14bee460d5b..cdbc4425fcb 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -49,9 +49,6 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, void *ptr, unsigned size, unsigned bind); -unsigned r300_buffer_is_referenced(struct pipe_context *context, - struct pipe_resource *buf); - /* Inline functions. */ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2b0c8750554..ecb4fc691cc 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -188,13 +188,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t blend_control_noclamp = 0; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */ uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ - boolean clamp = TRUE; CB_LOCALS; + blend->state = *state; + if (state->rt[0].blend_enable) { unsigned eqRGB = state->rt[0].rgb_func; @@ -207,10 +210,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ - blend_control = R300_ALPHA_BLEND_ENABLE | - r300_translate_blend_function(eqRGB, clamp) | + blend_control = blend_control_noclamp = + R300_ALPHA_BLEND_ENABLE | ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + blend_control |= + r300_translate_blend_function(eqRGB, TRUE); + blend_control_noclamp |= + r300_translate_blend_function(eqRGB, FALSE); /* Optimization: some operations do not require the destination color. * @@ -232,6 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { /* Enable reading from the colorbuffer. */ blend_control |= R300_READ_ENABLE; + blend_control_noclamp |= R300_READ_ENABLE; if (r300screen->caps.is_r500) { /* Optimization: Depending on incoming pixels, we can @@ -269,8 +277,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, * * Equations other than ADD are rarely used and therefore won't be * optimized. */ - if (clamp && - (eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { /* ADD: X+Y * REVERSE_SUBTRACT: Y-X @@ -308,10 +315,14 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { blend_control |= R300_SEPARATE_ALPHA_ENABLE; - alpha_blend_control = - r300_translate_blend_function(eqA, clamp) | + blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE; + alpha_blend_control = alpha_blend_control_noclamp = (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + alpha_blend_control |= + r300_translate_blend_function(eqA, TRUE); + alpha_blend_control_noclamp |= + r300_translate_blend_function(eqA, FALSE); } } @@ -348,7 +359,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, */ /* Build a command buffer. */ - BEGIN_CB(blend->cb, 8); + BEGIN_CB(blend->cb_clamp, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); OUT_CB(blend_control); @@ -357,6 +368,16 @@ static void* r300_create_blend_state(struct pipe_context* pipe, OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); END_CB; + /* Build a command buffer. */ + BEGIN_CB(blend->cb_noclamp, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(blend_control_noclamp); + OUT_CB(alpha_blend_control_noclamp); + OUT_CB(color_channel_mask); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + /* The same as above, but with no colorbuffer reads and writes. */ BEGIN_CB(blend->cb_no_readwrite, 8); OUT_CB_REG(R300_RB3D_ROPCNTL, rop); @@ -377,6 +398,10 @@ static void r300_bind_blend_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); UPDATE_STATE(state, r300->blend_state); + + if (r300->fs.state && r300_pick_fragment_shader(r300)) { + r300_mark_fs_code_dirty(r300); + } } /* Free blend state. */ @@ -836,10 +861,9 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ - if (!!old_state->nr_cbufs != !!state->nr_cbufs) { - r300_mark_atom_dirty(r300, &r300->blend_state); - } + /* Need to reset clamping or colormask. */ + r300_mark_atom_dirty(r300, &r300->blend_state); + /* If zsbuf is set from NULL to non-NULL or vice versa.. */ if (!!old_state->zsbuf != !!state->zsbuf) { r300_mark_atom_dirty(r300, &r300->dsa_state); @@ -1593,12 +1617,6 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) enum pipe_format format; unsigned i; - if (velems->count > 16) { - fprintf(stderr, "r300: More than 16 vertex elements are not supported," - " requested %i, using 16.\n", velems->count); - velems->count = 16; - } - /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ @@ -1649,9 +1667,12 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM; attribs = &dummy_attrib; count = 1; + } else if (count > 16) { + fprintf(stderr, "r300: More than 16 vertex elements are not supported," + " requested %i, using 16.\n", count); + count = 16; } - assert(count <= PIPE_MAX_ATTRIBS); velems = CALLOC_STRUCT(r300_vertex_element_state); if (!velems) return NULL; @@ -1849,6 +1870,14 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, } } +static void r300_texture_barrier(struct pipe_context *pipe) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_mark_atom_dirty(r300, &r300->gpu_flush); + r300_mark_atom_dirty(r300, &r300->texture_cache_inval); +} + void r300_init_state_functions(struct r300_context* r300) { r300->context.create_blend_state = r300_create_blend_state; @@ -1904,4 +1933,6 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.create_vs_state = r300_create_vs_state; r300->context.bind_vs_state = r300_bind_vs_state; r300->context.delete_vs_state = r300_delete_vs_state; + + r300->context.texture_barrier = r300_texture_barrier; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a1e116f4b61..ec00e2552ca 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -644,16 +644,20 @@ static uint32_t r300_get_border_color(enum pipe_format format, switch (format) { case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: /* Add 1/32 to round the border color instead of truncating. */ /* The Y component is used for the border color. */ border_swizzled[1] = border_swizzled[2] + 1.0f/32; util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); return uc.ui; case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: border_swizzled[0] = border_swizzled[2]; util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); return uc.ui; case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); return uc.ui; default: diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 86ad0b8b8e0..c650fb7ed37 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -174,9 +174,11 @@ uint32_t r300_translate_texformat(enum pipe_format format, if (util_format_is_compressed(format) && dxtc_swizzle && format != PIPE_FORMAT_RGTC2_UNORM && - format != PIPE_FORMAT_RGTC2_SNORM) { + format != PIPE_FORMAT_RGTC2_SNORM && + format != PIPE_FORMAT_LATC2_UNORM && + format != PIPE_FORMAT_LATC2_SNORM) { result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - dxtc_swizzle); + TRUE); } else { result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE); @@ -209,13 +211,19 @@ uint32_t r300_translate_texformat(enum pipe_format format, if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_SNORM: result |= sign_bit[1]; + case PIPE_FORMAT_LATC1_UNORM: case PIPE_FORMAT_RGTC1_UNORM: return R500_TX_FORMAT_ATI1N | result; + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_SNORM: result |= sign_bit[2] | sign_bit[3]; case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_LATC2_UNORM: return R400_TX_FORMAT_ATI2N | result; + default: return ~0; /* Unsupported/unknown. */ } @@ -363,6 +371,8 @@ uint32_t r500_tx_format_msb_bit(enum pipe_format format) switch (format) { case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: return R500_TXFORMAT_MSB; @@ -742,7 +752,6 @@ static const struct u_resource_vtbl r300_texture_vtbl = { NULL, /* get_handle */ r300_texture_destroy, /* resource_destroy */ - NULL, /* is_resource_referenced */ r300_texture_get_transfer, /* get_transfer */ r300_texture_transfer_destroy, /* transfer_destroy */ r300_texture_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 3846fb8b6b3..2910666dd51 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -315,7 +315,7 @@ static unsigned r300_pixels_to_dwords(unsigned stride, unsigned height, unsigned xblock, unsigned yblock) { - return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock); + return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock); } static void r300_setup_hyperz_properties(struct r300_screen *screen, @@ -390,7 +390,7 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, tex->tex.zcomp8x8[i] = zcompsize == 8; tex->tex.zmask_stride_in_pixels[i] = - align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); + util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); } else { tex->tex.zmask_dwords[i] = 0; tex->tex.zcomp8x8[i] = FALSE; @@ -398,7 +398,7 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen, } /* Now setup HIZ. */ - stride = align(stride, hiz_align_x[pipes-1]); + stride = util_align_npot(stride, hiz_align_x[pipes-1]); height = align(height, hiz_align_y[pipes-1]); /* Get the HIZ buffer size in dwords. */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 30de9ec1e32..65c5095be6a 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -72,7 +72,8 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, transfer->box.x, transfer->box.y, transfer->box.z, &r300transfer->linear_texture->b.b.b, 0, &src_box); - ctx->flush(ctx, 0, NULL); + /* XXX remove this. */ + r300_flush(ctx, 0, NULL); } struct pipe_transfer* @@ -100,7 +101,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || - desc->layout == UTIL_FORMAT_LAYOUT_S3TC; + desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC; trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -151,7 +153,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); trans->linear_texture = r300_resource( ctx->screen->resource_create(ctx->screen, @@ -175,13 +177,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, assert(!trans->linear_texture->tex.microtile && !trans->linear_texture->tex.macrotile[0]); - /* Set the stride. - * - * Even though we are using an internal texture for this, - * the transfer level, box and usage parameters still reflect - * the arguments received to get_transfer. We just do the - * right thing internally. - */ + /* Set the stride. */ trans->transfer.stride = trans->linear_texture->tex.stride_in_bytes[0]; @@ -191,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, r300_copy_from_tiled_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + r300_flush(ctx, 0, NULL); } return &trans->transfer; } @@ -201,8 +197,9 @@ r300_texture_get_transfer(struct pipe_context *ctx, trans->transfer.stride = tex->tex.stride_in_bytes[level]; trans->offset = r300_texture_get_offset(tex, level, box->z); - if (referenced_cs) - ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + if (referenced_cs && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + r300_flush(ctx, 0, NULL); return &trans->transfer; } return NULL; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index c0b66899f8b..3a6798a5423 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -35,6 +35,7 @@ #include "pipe/p_state.h" #define R300_MAX_CMDBUF_DWORDS (16 * 1024) +#define R300_FLUSH_ASYNC (1 << 0) struct winsys_handle; struct r300_winsys_screen; @@ -265,15 +266,9 @@ struct r300_winsys_screen { * Flush a command stream. * * \param cs A command stream to flush. + * \param flags, R300_FLUSH_ASYNC or 0. */ - void (*cs_flush)(struct r300_winsys_cs *cs); - - /** - * Wait until the last flush is completed. - * - * \param cs A command stream. - */ - void (*cs_sync_flush)(struct r300_winsys_cs *cs); + void (*cs_flush)(struct r300_winsys_cs *cs, unsigned flags); /** * Set a flush callback which is called from winsys when flush is @@ -284,7 +279,7 @@ struct r300_winsys_screen { * \param user A user pointer that will be passed to the flush callback. */ void (*cs_set_flush)(struct r300_winsys_cs *cs, - void (*flush)(void *), + void (*flush)(void *ctx, unsigned flags), void *user); /** diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 8cb417f9731..20a319a255d 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -97,15 +97,3 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) } return 0; } - -void eg_cf_vtx(struct r600_vertex_element *ve) -{ - struct r600_pipe_state *rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - (r600_bo_offset(ve->fetch_shader)) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4206b4a201d..77432661b64 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -150,10 +150,6 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -210,7 +206,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * evergreen_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL); @@ -1329,7 +1328,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader { struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; int pos_index = -1, face_index = -1; int ninterp = 0; boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; @@ -1337,6 +1336,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rstate->nregs = 0; + db_shader_control = 0; for (i = 0; i < rshader->ninput; i++) { /* evergreen NUM_INTERP only contains values interpolated into the LDS, POSITION goes via GPRs from the SC so isn't counted */ @@ -1358,16 +1358,12 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader } for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_EXPORT_ENABLE(1), - S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); + db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1); } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; num_cout = 0; @@ -1442,15 +1438,15 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - + /* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */ + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF, 0xFFFFFFFF, NULL); @@ -1501,6 +1497,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0xFFFFFFFF, NULL); } +void evergreen_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, + (r600_bo_offset(ve->fetch_shader)) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 996d9f74a60..45e614977e2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -483,7 +483,9 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) { return is_alu_reduction_inst(bc, alu) || - is_alu_mova_inst(bc, alu); + is_alu_mova_inst(bc, alu) || + (bc->chiprev == CHIPREV_EVERGREEN && + alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR); } /* alu instructions that can only execute on the trans unit */ @@ -525,8 +527,9 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) case CHIPREV_EVERGREEN: default: if (!alu->is_op3) - /* Note that FLT_TO_INT* instructions are vector instructions - * on Evergreen, despite what the documentation says. */ + /* Note that FLT_TO_INT_* instructions are vector-only instructions + * on Evergreen, despite what the documentation says. FLT_TO_INT + * can do both vector and scalar. */ return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT || alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT || @@ -1314,6 +1317,24 @@ static void r600_bc_remove_alu(struct r600_bc_cf *cf, struct r600_bc_alu *alu) cf->ndw -= 2; } +static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) +{ + switch (bc->chiprev) { + case CHIPREV_R600: + return 8; + + case CHIPREV_R700: + return 16; + + case CHIPREV_EVERGREEN: + return 64; + + default: + R600_ERR("Unknown chiprev %d.\n", bc->chiprev); + return 8; + } +} + int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -1339,7 +1360,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* each fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -1386,7 +1407,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) /* each texture fetch use 4 dwords */ bc->cf_last->ndw += 4; bc->ndw += 4; - if ((bc->cf_last->ndw / 4) > 7) + if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc)) bc->force_add_cf = 1; return 0; } @@ -1406,31 +1427,7 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - unsigned fetch_resource_start = 0; - - /* check if we are fetch shader */ - /* fetch shader can also access vertex resource, - * first fetch shader resource is at 160 - */ - if (bc->type == -1) { - switch (bc->chiprev) { - /* r600 */ - case CHIPREV_R600: - /* r700 */ - case CHIPREV_R700: - fetch_resource_start = 160; - break; - /* evergreen */ - case CHIPREV_EVERGREEN: - fetch_resource_start = 0; - break; - default: - fprintf(stderr, "%s:%s:%d unknown chiprev %d\n", - __FILE__, __func__, __LINE__, bc->chiprev); - break; - } - } - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) | + bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | @@ -1523,6 +1520,14 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } +static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); +} + enum cf_class { CF_CLASS_ALU, @@ -1596,11 +1601,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) break; case CF_CLASS_TEXTURE: case CF_CLASS_VERTEX: - bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); - bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | - S_SQ_CF_WORD1_BARRIER(cf->barrier) | - S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) | - S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program); + if (bc->chiprev == CHIPREV_R700) + r700_bc_cf_vtx_build(&bc->bytecode[id], cf); + else + r600_bc_cf_vtx_build(&bc->bytecode[id], cf); break; case CF_CLASS_EXPORT: bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | @@ -2801,22 +2805,6 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_cf_vtx(struct r600_vertex_element *ve) -{ - struct r600_pipe_state *rstate; - - rstate = &ve->rstate; - rstate->id = R600_PIPE_STATE_FETCH_SHADER; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, - r600_bo_offset(ve->fetch_shader) >> 8, - 0xFFFFFFFF, ve->fetch_shader); -} - static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp) { @@ -2987,7 +2975,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru alu.src[0].chan = 3; alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = (1l << 32) / elements[i].instance_divisor + 1; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; alu.dst.sel = i + 1; alu.dst.chan = 3; @@ -3075,9 +3063,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bc_clear(&bc); if (rctx->family >= CHIP_CEDAR) - eg_cf_vtx(ve); + evergreen_fetch_shader(ve); else - r600_cf_vtx(ve); + r600_fetch_shader(ve); return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index dbd1e204b49..db8d0a4ed10 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -188,7 +188,6 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); @@ -206,6 +205,7 @@ void r600_bc_dump(struct r600_bc *bc); int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf); int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); #endif diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 2363cd1ebc5..6ced719c8f0 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -52,14 +52,6 @@ static void r600_buffer_destroy(struct pipe_screen *screen, util_slab_free(&rscreen->pool_buffers, rbuffer); } -static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, - struct pipe_resource *buf, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -160,7 +152,6 @@ static const struct u_resource_vtbl r600_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ r600_buffer_destroy, /* resource_destroy */ - r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */ r600_get_transfer, /* get_transfer */ r600_transfer_destroy, /* transfer_destroy */ r600_buffer_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 651f994a502..4a30eddb621 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -49,7 +49,7 @@ /* * pipe_context */ -static void r600_flush(struct pipe_context *ctx, unsigned flags, +static void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -281,18 +281,28 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - return 0; + /* R600 doesn't support per-MRT blends */ + if (family == CHIP_R600) + return 0; + else + return 0; case PIPE_CAP_ARRAY_TEXTURES: /* fix once the CS checker upstream is fixed */ @@ -394,7 +404,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_ADDRS: return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */ case PIPE_SHADER_CAP_MAX_CONSTS: - return 256; //max native parameters + return R600_MAX_CONST_BUFFER_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return R600_MAX_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: @@ -417,8 +427,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned usage, - unsigned geom_flags) + unsigned usage) { unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5f701d87e8f..396801e4a41 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,6 +38,7 @@ #include "r600_resource.h" #define R600_MAX_CONST_BUFFERS 1 +#define R600_MAX_CONST_BUFFER_SIZE 4096 enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, @@ -175,6 +176,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_fetch_shader(struct r600_vertex_element *ve); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, @@ -214,6 +216,9 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_spi_update(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void r600_fetch_shader(struct r600_vertex_element *ve); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 343403f92f3..181ea3f9e49 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -62,7 +62,7 @@ static boolean r600_get_query_result(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; if (rquery->num_results) { - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index 207642ccfa9..f3ab3613c84 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -61,5 +61,4 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600) r600->context.transfer_unmap = u_transfer_unmap_vtbl; r600->context.transfer_destroy = u_transfer_destroy_vtbl; r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; - r600->context.is_resource_referenced = u_is_resource_referenced_vtbl; } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0b4dc75e584..12786daa464 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -34,55 +34,6 @@ #include <stdio.h> #include <errno.h> -static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned spi_vs_out_id[10]; - unsigned i, tmp; - - /* clear previous register */ - rstate->nregs = 0; - - /* so far never got proper semantic id from tgsi */ - /* FIXME better to move this in config things so they get emited - * only one time per cs - */ - for (i = 0; i < 10; i++) { - spi_vs_out_id[i] = 0; - } - for (i = 0; i < 32; i++) { - tmp = i << ((i & 3) * 8); - spi_vs_out_id[i / 4] |= tmp; - } - for (i = 0; i < 10; i++) { - r600_pipe_state_add_reg(rstate, - R_028614_SPI_VS_OUT_ID_0 + i * 4, - spi_vs_out_id[i], 0xFFFFFFFF, NULL); - } - - r600_pipe_state_add_reg(rstate, - R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028868_SQ_PGM_RESOURCES_VS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288D0_SQ_PGM_CF_OFFSET_VS, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028858_SQ_PGM_START_VS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, - 0xFFFFFFFF, NULL); - -} - int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) { @@ -97,104 +48,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, return 0; } -static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - struct r600_pipe_state *rstate = &shader->rstate; - struct r600_shader *rshader = &shader->shader; - unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; - int pos_index = -1, face_index = -1; - - rstate->nregs = 0; - - for (i = 0; i < rshader->ninput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - face_index = i; - } - - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(1), - S_02880C_Z_EXPORT_ENABLE(1), NULL); - if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), - S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); - } - - exports_ps = 0; - num_cout = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) - exports_ps |= 1; - else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; - } - } - exports_ps |= S_028854_EXPORT_COLORS(num_cout); - if (!exports_ps) { - /* always at least export 1 component per pixel */ - exports_ps = 2; - } - - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); - spi_input_z = 0; - if (pos_index != -1) { - spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | - S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | - S_0286CC_BARYC_SAMPLE_CNTL(1)); - spi_input_z |= 1; - } - - spi_ps_in_control_1 = 0; - if (face_index != -1) { - spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | - S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); - } - - r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028840_SQ_PGM_START_PS, - r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); - r600_pipe_state_add_reg(rstate, - R_028850_SQ_PGM_RESOURCES_PS, - S_028868_NUM_GPRS(rshader->bc.ngpr) | - S_028868_STACK_SIZE(rshader->bc.nstack), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028854_SQ_PGM_EXPORTS_PS, - exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0288CC_SQ_PGM_CF_OFFSET_PS, - 0x00000000, 0xFFFFFFFF, NULL); - - if (rshader->fs_write_all) { - r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - S_028808_MULTIWRITE_ENABLE(1), - S_028808_MULTIWRITE_ENABLE(1), - NULL); - } - - if (rshader->uses_kill) { - /* only set some bits here, the other bits are set in the dsa state */ - r600_pipe_state_add_reg(rstate, - R_02880C_DB_SHADER_CONTROL, - S_02880C_KILL_ENABLE(1), - S_02880C_KILL_ENABLE(1), NULL); - } - r600_pipe_state_add_reg(rstate, - R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, - 0xFFFFFFFF, NULL); -} - static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -1706,37 +1559,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { - int src_chan, src2_chan; + static const unsigned src0_swizzle[] = {2, 2, 0, 1}; + static const unsigned src1_swizzle[] = {1, 0, 2, 2}; /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); - switch (i) { - case 0: - src_chan = 2; - src2_chan = 1; - break; - case 1: - src_chan = 2; - src2_chan = 0; - break; - case 2: - src_chan = 0; - src2_chan = 2; - break; - case 3: - src_chan = 1; - src2_chan = 2; - break; - default: - assert(0); - src_chan = 0; - src2_chan = 0; - break; - } - r600_bc_src(&alu.src[0], &ctx->src[0], src_chan); - r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -2011,6 +1842,8 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + static const unsigned int src0_swizzle[] = {2, 0, 1}; + static const unsigned int src1_swizzle[] = {1, 2, 0}; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2021,33 +1854,12 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - - switch (i) { - case 0: - r600_bc_src(&alu.src[0], &ctx->src[0], 2); - break; - case 1: - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - break; - case 2: - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - switch (i) { - case 0: - r600_bc_src(&alu.src[1], &ctx->src[1], 1); - break; - case 1: - r600_bc_src(&alu.src[1], &ctx->src[1], 2); - break; - case 2: - r600_bc_src(&alu.src[1], &ctx->src[1], 0); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2067,32 +1879,12 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - switch (i) { - case 0: - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - break; - case 1: - r600_bc_src(&alu.src[0], &ctx->src[0], 2); - break; - case 2: - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - break; - case 3: + if (i < 3) { + r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); + r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); + } else { alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; - } - - switch (i) { - case 0: - r600_bc_src(&alu.src[1], &ctx->src[1], 2); - break; - case 1: - r600_bc_src(&alu.src[1], &ctx->src[1], 0); - break; - case 2: - r600_bc_src(&alu.src[1], &ctx->src[1], 1); - break; - case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = i; } @@ -2960,7 +2752,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1be641798f7..efb68cbd139 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -117,9 +117,10 @@ static void r600_set_blend_color(struct pipe_context *ctx, static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); struct r600_pipe_state *rstate; - u32 color_control, target_mask; + u32 color_control = 0, target_mask; if (blend == NULL) { return NULL; @@ -129,7 +130,10 @@ static void *r600_create_blend_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_BLEND; target_mask = 0; - color_control = S_028808_PER_MRT_BLEND(1); + + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + color_control |= S_028808_PER_MRT_BLEND(1); if (state->logicop_enable) { color_control |= (state->logicop_func << 16) | (state->logicop_func << 20); } else { @@ -152,8 +156,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; + /* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, - color_control, 0xFFFFFFFF, NULL); + color_control, 0xFFFFFFFD, NULL); for (int i = 0; i < 8; i++) { unsigned eqRGB = state->rt[i].rgb_func; @@ -179,10 +184,11 @@ static void *r600_create_blend_state(struct pipe_context *ctx, bc |= S_028804_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA)); } - r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); - if (i == 0) { + /* R600 does not support per-MRT blends */ + if (rctx->family > CHIP_R600) + r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL); + if (i == 0) r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL); - } } return rstate; } @@ -200,10 +206,6 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_DSA; /* depth TODO some of those db_shader_control field depend on shader adjust mask & add it to shader */ - /* db_shader_control is 0xFFFFFFBE as Z_EXPORT_ENABLE (bit 0) will be - * set by fragment shader if it export Z and KILL_ENABLE (bit 6) will - * be set if shader use texkill instruction - */ db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); stencil_ref_mask = 0; stencil_ref_mask_bf = 0; @@ -262,7 +264,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBE, NULL); + /* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE, + * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by + * r600_pipe_shader_ps().*/ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL); r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL); @@ -501,7 +506,8 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, + i + R600_MAX_CONST_BUFFERS); } } } @@ -1232,6 +1238,163 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_context_pipe_state_set(&rctx->ctx, rstate); } +void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; + int pos_index = -1, face_index = -1; + + rstate->nregs = 0; + + for (i = 0; i < rshader->ninput; i++) { + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + pos_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + } + + db_shader_control = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1); + } + if (rshader->uses_kill) + db_shader_control |= S_02880C_KILL_ENABLE(1); + + exports_ps = 0; + num_cout = 0; + for (i = 0; i < rshader->noutput; i++) { + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { + num_cout++; + } + } + exports_ps |= S_028854_EXPORT_COLORS(num_cout); + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | + S_0286CC_PERSP_GRADIENT_ENA(1); + spi_input_z = 0; + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); + spi_input_z |= 1; + } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028840_SQ_PGM_START_PS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + r600_pipe_state_add_reg(rstate, + R_028850_SQ_PGM_RESOURCES_PS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028854_SQ_PGM_EXPORTS_PS, + exports_ps, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288CC_SQ_PGM_CF_OFFSET_PS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + /* only set some bits here, the other bits are set in the dsa state */ + r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, + db_shader_control, + S_02880C_Z_EXPORT_ENABLE(1) | + S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | + S_02880C_KILL_ENABLE(1), + NULL); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_state *rstate = &shader->rstate; + struct r600_shader *rshader = &shader->shader; + unsigned spi_vs_out_id[10]; + unsigned i, tmp; + + /* clear previous register */ + rstate->nregs = 0; + + /* so far never got proper semantic id from tgsi */ + /* FIXME better to move this in config things so they get emited + * only one time per cs + */ + for (i = 0; i < 10; i++) { + spi_vs_out_id[i] = 0; + } + for (i = 0; i < 32; i++) { + tmp = i << ((i & 3) * 8); + spi_vs_out_id[i / 4] |= tmp; + } + for (i = 0; i < 10; i++) { + r600_pipe_state_add_reg(rstate, + R_028614_SPI_VS_OUT_ID_0 + i * 4, + spi_vs_out_id[i], 0xFFFFFFFF, NULL); + } + + r600_pipe_state_add_reg(rstate, + R_0286C4_SPI_VS_OUT_CONFIG, + S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028868_SQ_PGM_RESOURCES_VS, + S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0288D0_SQ_PGM_CF_OFFSET_VS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028858_SQ_PGM_START_VS, + r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); + + r600_pipe_state_add_reg(rstate, + R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, + 0xFFFFFFFF, NULL); +} + +void r600_fetch_shader(struct r600_vertex_element *ve) +{ + struct r600_pipe_state *rstate; + + rstate = &ve->rstate; + rstate->id = R600_PIPE_STATE_FETCH_SHADER; + rstate->nregs = 0; + r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS, + r600_bo_offset(ve->fetch_shader) >> 8, + 0xFFFFFFFF, ve->fetch_shader); +} + void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 16b53a0b2a5..d079b571156 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -68,7 +68,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 rtransfer->staging_texture, 0, &sbox); - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, @@ -299,14 +299,14 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, res->format, res->target, res->nr_samples, - bind, 0)) + bind)) return FALSE; if (!screen->is_format_supported(screen, res->format, res->target, res->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)) + PIPE_BIND_SAMPLER_VIEW)) return FALSE; switch (res->usage) { @@ -349,19 +349,10 @@ static void r600_texture_destroy(struct pipe_screen *screen, FREE(rtex); } -static unsigned int r600_texture_is_referenced(struct pipe_context *context, - struct pipe_resource *texture, - unsigned level, int layer) -{ - /* FIXME */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - static const struct u_resource_vtbl r600_texture_vtbl = { r600_texture_get_handle, /* get_handle */ r600_texture_destroy, /* resource_destroy */ - r600_texture_is_referenced, /* is_resource_referenced */ r600_texture_get_transfer, /* get_transfer */ r600_texture_transfer_destroy, /* transfer_destroy */ r600_texture_transfer_map, /* transfer_map */ @@ -423,10 +414,12 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, /* Would like some magic "get_bool_option_once" routine. */ if (force_tiling == -1) { - struct r600_screen *rscreen = (struct r600_screen *)screen; +#if 0 /* reenable when 2D tiling is fixed better */ - /*if (r600_get_minor_version(rscreen->radeon) >= 9) - force_tiling = debug_get_bool_option("R600_TILING", TRUE);*/ + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + force_tiling = debug_get_bool_option("R600_TILING", TRUE); +#endif force_tiling = debug_get_bool_option("R600_TILING", FALSE); } @@ -653,7 +646,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ - ctx->flush(ctx, 0, NULL); + ctx->flush(ctx, NULL); } return &trans->transfer; } diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index a7f2f54736e..b3c7d1494fc 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -26,6 +26,15 @@ #include "r600_asm.h" #include "r700_sq.h" +void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf) +{ + unsigned count = (cf->ndw / 4) - 1; + *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + *bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COUNT(count) | + S_SQ_CF_WORD1_COUNT_3(count >> 3); +} int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 4a5d5413d8c..6e2d6ba1efa 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -44,7 +44,10 @@ rbug_destroy(struct pipe_context *_pipe) struct pipe_context *pipe = rb_pipe->pipe; remove_from_list(&rb_pipe->list); + pipe_mutex_lock(rb_pipe->call_mutex); pipe->destroy(pipe); + rb_pipe->pipe = NULL; + pipe_mutex_unlock(rb_pipe->call_mutex); FREE(rb_pipe); } @@ -112,7 +115,12 @@ rbug_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) pipe_mutex_lock(rb_pipe->draw_mutex); rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_BEFORE); - pipe->draw_vbo(pipe, info); + pipe_mutex_lock(rb_pipe->call_mutex); + if (!(rb_pipe->curr.fs && rb_pipe->curr.fs->disabled) && + !(rb_pipe->curr.gs && rb_pipe->curr.gs->disabled) && + !(rb_pipe->curr.vs && rb_pipe->curr.vs->disabled)) + pipe->draw_vbo(pipe, info); + pipe_mutex_unlock(rb_pipe->call_mutex); rbug_draw_block_locked(rb_pipe, RBUG_BLOCK_AFTER); pipe_mutex_unlock(rb_pipe->draw_mutex); @@ -125,8 +133,10 @@ rbug_create_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); return pipe->create_query(pipe, query_type); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -136,8 +146,10 @@ rbug_destroy_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->destroy_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -147,8 +159,10 @@ rbug_begin_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->begin_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -158,8 +172,10 @@ rbug_end_query(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->end_query(pipe, query); + pipe_mutex_unlock(rb_pipe->call_mutex); } static boolean @@ -170,11 +186,16 @@ rbug_get_query_result(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + boolean ret; - return pipe->get_query_result(pipe, - query, - wait, - result); + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->get_query_result(pipe, + query, + wait, + result); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void * @@ -183,9 +204,14 @@ rbug_create_blend_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_blend_state(pipe, + blend); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_blend_state(pipe, - blend); + return ret; } static void @@ -195,8 +221,10 @@ rbug_bind_blend_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_blend_state(pipe, - blend); + blend); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -206,8 +234,10 @@ rbug_delete_blend_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_blend_state(pipe, blend); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -216,9 +246,14 @@ rbug_create_sampler_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_sampler_state(pipe, + sampler); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_sampler_state(pipe, - sampler); + return ret; } static void @@ -229,9 +264,11 @@ rbug_bind_fragment_sampler_states(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_fragment_sampler_states(pipe, num_samplers, samplers); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -242,9 +279,11 @@ rbug_bind_vertex_sampler_states(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_vertex_sampler_states(pipe, num_samplers, samplers); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -254,8 +293,10 @@ rbug_delete_sampler_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_sampler_state(pipe, sampler); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -264,9 +305,14 @@ rbug_create_rasterizer_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; - return pipe->create_rasterizer_state(pipe, - rasterizer); + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_rasterizer_state(pipe, + rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void @@ -276,8 +322,10 @@ rbug_bind_rasterizer_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_rasterizer_state(pipe, rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -287,8 +335,10 @@ rbug_delete_rasterizer_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_rasterizer_state(pipe, rasterizer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -297,9 +347,14 @@ rbug_create_depth_stencil_alpha_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; + + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); - return pipe->create_depth_stencil_alpha_state(pipe, - depth_stencil_alpha); + return ret; } static void @@ -309,8 +364,10 @@ rbug_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -320,8 +377,10 @@ rbug_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_depth_stencil_alpha_state(pipe, depth_stencil_alpha); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -332,7 +391,10 @@ rbug_create_fs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_fs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -347,10 +409,14 @@ rbug_bind_fs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *fs; + pipe_mutex_lock(rb_pipe->call_mutex); + fs = rbug_shader_unwrap(_fs); rb_pipe->curr.fs = rbug_shader(_fs); pipe->bind_fs_state(pipe, fs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -360,7 +426,9 @@ rbug_delete_fs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_fs); + pipe_mutex_lock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -371,7 +439,10 @@ rbug_create_vs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_vs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -386,10 +457,14 @@ rbug_bind_vs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *vs; + pipe_mutex_lock(rb_pipe->call_mutex); + vs = rbug_shader_unwrap(_vs); rb_pipe->curr.vs = rbug_shader(_vs); pipe->bind_vs_state(pipe, vs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -399,7 +474,9 @@ rbug_delete_vs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_vs); + pipe_mutex_unlock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -410,7 +487,10 @@ rbug_create_gs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_gs_state(pipe, state); + pipe_mutex_unlock(rb_pipe->call_mutex); + if (!result) return NULL; @@ -425,10 +505,14 @@ rbug_bind_gs_state(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; void *gs; + pipe_mutex_lock(rb_pipe->call_mutex); + gs = rbug_shader_unwrap(_gs); rb_pipe->curr.gs = rbug_shader(_gs); pipe->bind_gs_state(pipe, gs); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -438,7 +522,9 @@ rbug_delete_gs_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct rbug_shader *rb_shader = rbug_shader(_gs); + pipe_mutex_lock(rb_pipe->call_mutex); rbug_shader_destroy(rb_pipe, rb_shader); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -448,10 +534,15 @@ rbug_create_vertex_elements_state(struct pipe_context *_pipe, { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + void *ret; - return pipe->create_vertex_elements_state(pipe, + pipe_mutex_lock(rb_pipe->call_mutex); + ret = pipe->create_vertex_elements_state(pipe, num_elements, vertex_elements); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } static void @@ -461,8 +552,10 @@ rbug_bind_vertex_elements_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->bind_vertex_elements_state(pipe, velems); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -472,8 +565,10 @@ rbug_delete_vertex_elements_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->delete_vertex_elements_state(pipe, velems); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -483,8 +578,10 @@ rbug_set_blend_color(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_blend_color(pipe, blend_color); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -494,8 +591,10 @@ rbug_set_stencil_ref(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_stencil_ref(pipe, stencil_ref); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -505,8 +604,10 @@ rbug_set_clip_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_clip_state(pipe, clip); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -526,10 +627,12 @@ rbug_set_constant_buffer(struct pipe_context *_pipe, resource = unwrapped_resource; } + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_constant_buffer(pipe, shader, index, resource); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -542,6 +645,9 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, struct pipe_framebuffer_state *state = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.nr_cbufs = 0; memset(rb_pipe->curr.cbufs, 0, sizeof(rb_pipe->curr.cbufs)); rb_pipe->curr.zsbuf = NULL; @@ -564,6 +670,8 @@ rbug_set_framebuffer_state(struct pipe_context *_pipe, pipe->set_framebuffer_state(pipe, state); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -573,8 +681,10 @@ rbug_set_polygon_stipple(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_polygon_stipple(pipe, poly_stipple); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -584,8 +694,10 @@ rbug_set_scissor_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_scissor_state(pipe, scissor); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -595,8 +707,10 @@ rbug_set_viewport_state(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_viewport_state(pipe, viewport); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -610,6 +724,9 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe, struct pipe_sampler_view **views = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.num_fs_views = 0; memset(rb_pipe->curr.fs_views, 0, sizeof(rb_pipe->curr.fs_views)); memset(rb_pipe->curr.fs_texs, 0, sizeof(rb_pipe->curr.fs_texs)); @@ -626,6 +743,8 @@ rbug_set_fragment_sampler_views(struct pipe_context *_pipe, } pipe->set_fragment_sampler_views(pipe, num, views); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -639,6 +758,9 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe, struct pipe_sampler_view **views = NULL; unsigned i; + /* must protect curr status */ + pipe_mutex_lock(rb_pipe->call_mutex); + rb_pipe->curr.num_vs_views = 0; memset(rb_pipe->curr.vs_views, 0, sizeof(rb_pipe->curr.vs_views)); memset(rb_pipe->curr.vs_texs, 0, sizeof(rb_pipe->curr.vs_texs)); @@ -655,6 +777,8 @@ rbug_set_vertex_sampler_views(struct pipe_context *_pipe, } pipe->set_vertex_sampler_views(pipe, num, views); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -668,6 +792,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, struct pipe_vertex_buffer *buffers = NULL; unsigned i; + pipe_mutex_lock(rb_pipe->call_mutex); + if (num_buffers) { memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); for (i = 0; i < num_buffers; i++) @@ -678,6 +804,8 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, pipe->set_vertex_buffers(pipe, num_buffers, buffers); + + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -694,7 +822,9 @@ rbug_set_index_buffer(struct pipe_context *_pipe, ib = &unwrapped_ib; } + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_index_buffer(pipe, ib); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -704,7 +834,9 @@ rbug_set_sample_mask(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->set_sample_mask(pipe, sample_mask); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -725,6 +857,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst = rb_resource_dst->resource; struct pipe_resource *src = rb_resource_src->resource; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->resource_copy_region(pipe, dst, dst_level, @@ -734,6 +867,7 @@ rbug_resource_copy_region(struct pipe_context *_pipe, src, src_level, src_box); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -746,11 +880,13 @@ rbug_clear(struct pipe_context *_pipe, struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear(pipe, buffers, rgba, depth, stencil); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -765,6 +901,7 @@ rbug_clear_render_target(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; struct pipe_surface *dst = rb_surface_dst->surface; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear_render_target(pipe, dst, rgba, @@ -772,6 +909,7 @@ rbug_clear_render_target(struct pipe_context *_pipe, dsty, width, height); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void @@ -788,6 +926,7 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe, struct pipe_context *pipe = rb_pipe->pipe; struct pipe_surface *dst = rb_surface_dst->surface; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->clear_depth_stencil(pipe, dst, clear_flags, @@ -797,36 +936,20 @@ rbug_clear_depth_stencil(struct pipe_context *_pipe, dsty, width, height); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void rbug_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; + pipe_mutex_lock(rb_pipe->call_mutex); pipe->flush(pipe, - flags, fence); -} - -static unsigned int -rbug_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, - int layer) -{ - struct rbug_context *rb_pipe = rbug_context(_pipe); - struct rbug_resource *rb_resource = rbug_resource(_resource); - struct pipe_context *pipe = rb_pipe->pipe; - struct pipe_resource *resource = rb_resource->resource; - - return pipe->is_resource_referenced(pipe, - resource, - level, - layer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static struct pipe_sampler_view * @@ -840,9 +963,11 @@ rbug_context_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *resource = rb_resource->resource; struct pipe_sampler_view *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_sampler_view(pipe, resource, templ); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_sampler_view_create(rb_pipe, rb_resource, result); @@ -868,9 +993,11 @@ rbug_context_create_surface(struct pipe_context *_pipe, struct pipe_resource *resource = rb_resource->resource; struct pipe_surface *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = pipe->create_surface(pipe, resource, surf_tmpl); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_surface_create(rb_pipe, rb_resource, result); @@ -881,8 +1008,13 @@ static void rbug_context_surface_destroy(struct pipe_context *_pipe, struct pipe_surface *_surface) { - rbug_surface_destroy(rbug_context(_pipe), - rbug_surface(_surface)); + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct rbug_surface *rb_surface = rbug_surface(_surface); + + pipe_mutex_lock(rb_pipe->call_mutex); + rbug_surface_destroy(rb_pipe, + rb_surface); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -900,11 +1032,13 @@ rbug_context_get_transfer(struct pipe_context *_context, struct pipe_resource *resource = rb_resource->resource; struct pipe_transfer *result; + pipe_mutex_lock(rb_pipe->call_mutex); result = context->get_transfer(context, resource, level, usage, box); + pipe_mutex_unlock(rb_pipe->call_mutex); if (result) return rbug_transfer_create(rb_pipe, rb_resource, result); @@ -915,8 +1049,13 @@ static void rbug_context_transfer_destroy(struct pipe_context *_pipe, struct pipe_transfer *_transfer) { - rbug_transfer_destroy(rbug_context(_pipe), - rbug_transfer(_transfer)); + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct rbug_transfer *rb_transfer =rbug_transfer(_transfer); + + pipe_mutex_lock(rb_pipe->call_mutex); + rbug_transfer_destroy(rb_pipe, + rb_transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); } static void * @@ -927,9 +1066,14 @@ rbug_context_transfer_map(struct pipe_context *_context, struct rbug_transfer *rb_transfer = rbug_transfer(_transfer); struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + void *ret; - return context->transfer_map(context, + pipe_mutex_lock(rb_pipe->call_mutex); + ret = context->transfer_map(context, transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); + + return ret; } @@ -944,9 +1088,11 @@ rbug_context_transfer_flush_region(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_flush_region(context, transfer, box); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -959,8 +1105,10 @@ rbug_context_transfer_unmap(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_transfer *transfer = rb_transfer->transfer; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_unmap(context, transfer); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -979,6 +1127,7 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_resource *resource = rb_resource->resource; + pipe_mutex_lock(rb_pipe->call_mutex); context->transfer_inline_write(context, resource, level, @@ -987,6 +1136,7 @@ rbug_context_transfer_inline_write(struct pipe_context *_context, data, stride, layer_stride); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -999,7 +1149,9 @@ static void rbug_redefine_user_buffer(struct pipe_context *_context, struct pipe_context *context = rb_pipe->pipe; struct pipe_resource *resource = rb_resource->resource; + pipe_mutex_lock(rb_pipe->call_mutex); context->redefine_user_buffer(context, resource, offset, size); + pipe_mutex_unlock(rb_pipe->call_mutex); } @@ -1077,7 +1229,6 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe->base.clear_render_target = rbug_clear_render_target; rb_pipe->base.clear_depth_stencil = rbug_clear_depth_stencil; rb_pipe->base.flush = rbug_flush; - rb_pipe->base.is_resource_referenced = rbug_is_resource_referenced; rb_pipe->base.create_sampler_view = rbug_context_create_sampler_view; rb_pipe->base.sampler_view_destroy = rbug_context_sampler_view_destroy; rb_pipe->base.create_surface = rbug_context_create_surface; diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index eb772d19d05..b80bcd4228f 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -498,7 +498,7 @@ rbug_context_flush(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32 /* protect the pipe context */ pipe_mutex_lock(rb_context->call_mutex); - rb_context->pipe->flush(rb_context->pipe, flush->flags, NULL); + rb_context->pipe->flush(rb_context->pipe, NULL); pipe_mutex_unlock(rb_context->call_mutex); pipe_mutex_unlock(rb_screen->list_mutex); diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c index d635ce575c0..7c8dfdcc12a 100644 --- a/src/gallium/drivers/rbug/rbug_screen.c +++ b/src/gallium/drivers/rbug/rbug_screen.c @@ -106,8 +106,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; @@ -116,8 +115,7 @@ rbug_screen_is_format_supported(struct pipe_screen *_screen, format, target, sample_count, - tex_usage, - geom_flags); + tex_usage); } static struct pipe_context * @@ -240,30 +238,28 @@ rbug_screen_fence_reference(struct pipe_screen *_screen, fence); } -static int +static boolean rbug_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; return screen->fence_signalled(screen, - fence, - flags); + fence); } -static int +static boolean rbug_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct rbug_screen *rb_screen = rbug_screen(_screen); struct pipe_screen *screen = rb_screen->screen; return screen->fence_finish(screen, fence, - flags); + timeout); } boolean diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 70fdfb7ddf3..ce22f646228 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -148,13 +148,13 @@ softpipe_destroy( struct pipe_context *pipe ) /** * if (the texture is being used as a framebuffer surface) - * return PIPE_REFERENCED_FOR_WRITE + * return SP_REFERENCED_FOR_WRITE * else if (the texture is a bound texture source) - * return PIPE_REFERENCED_FOR_READ + * return SP_REFERENCED_FOR_READ * else - * return PIPE_UNREFERENCED + * return SP_UNREFERENCED */ -static unsigned int +unsigned int softpipe_is_resource_referenced( struct pipe_context *pipe, struct pipe_resource *texture, unsigned level, int layer) @@ -163,19 +163,19 @@ softpipe_is_resource_referenced( struct pipe_context *pipe, unsigned i; if (texture->target == PIPE_BUFFER) - return PIPE_UNREFERENCED; + return SP_UNREFERENCED; /* check if any of the bound drawing surfaces are this texture */ if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (softpipe->framebuffer.cbufs[i] && softpipe->framebuffer.cbufs[i]->texture == texture) { - return PIPE_REFERENCED_FOR_WRITE; + return SP_REFERENCED_FOR_WRITE; } } if (softpipe->framebuffer.zsbuf && softpipe->framebuffer.zsbuf->texture == texture) { - return PIPE_REFERENCED_FOR_WRITE; + return SP_REFERENCED_FOR_WRITE; } } @@ -183,20 +183,20 @@ softpipe_is_resource_referenced( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { if (softpipe->fragment_tex_cache[i] && softpipe->fragment_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + return SP_REFERENCED_FOR_READ; } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (softpipe->vertex_tex_cache[i] && softpipe->vertex_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + return SP_REFERENCED_FOR_READ; } for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { if (softpipe->geometry_tex_cache[i] && softpipe->geometry_tex_cache[i]->texture == texture) - return PIPE_REFERENCED_FOR_READ; + return SP_REFERENCED_FOR_READ; } - return PIPE_UNREFERENCED; + return SP_UNREFERENCED; } @@ -255,9 +255,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe->pipe.is_resource_referenced = softpipe_is_resource_referenced; + softpipe->pipe.flush = softpipe_flush_wrapped; softpipe->pipe.render_condition = softpipe_render_condition; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index c91709aef06..a572ee8cf00 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -198,4 +198,13 @@ struct pipe_context * softpipe_create_context( struct pipe_screen *, void *priv ); +#define SP_UNREFERENCED 0 +#define SP_REFERENCED_FOR_READ (1 << 0) +#define SP_REFERENCED_FOR_WRITE (1 << 1) + +unsigned int +softpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned level, int layer); + #endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c index 66c52141132..7b79a0df4ea 100644 --- a/src/gallium/drivers/softpipe/sp_fence.c +++ b/src/gallium/drivers/softpipe/sp_fence.c @@ -41,23 +41,22 @@ softpipe_fence_reference(struct pipe_screen *screen, } -static int +static boolean softpipe_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { assert(!fence); - return 0; + return TRUE; } -static int +static boolean softpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { assert(!fence); - return 0; + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 6f7addd441a..720fea83cb2 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -42,7 +42,7 @@ void softpipe_flush( struct pipe_context *pipe, - unsigned flags, + unsigned flags, struct pipe_fence_handle **fence ) { struct softpipe_context *softpipe = softpipe_context(pipe); @@ -50,7 +50,7 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + if (flags & SP_FLUSH_TEXTURE_CACHE) { for (i = 0; i < softpipe->num_fragment_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]); } @@ -62,34 +62,27 @@ softpipe_flush( struct pipe_context *pipe, } } - if (flags & PIPE_FLUSH_SWAPBUFFERS) { - /* If this is a swapbuffers, just flush color buffers. - * - * The zbuffer changes are not discarded, but held in the cache - * in the hope that a later clear will wipe them out. - */ - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe->cbuf_cache[i]); - - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - softpipe_unmap_transfers(softpipe); - } - else if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe->cbuf_cache[i]); - - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe->zsbuf_cache); - - softpipe->dirty_render_cache = FALSE; - } + /* If this is a swapbuffers, just flush color buffers. + * + * The zbuffer changes are not discarded, but held in the cache + * in the hope that a later clear will wipe them out. + */ + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe->zsbuf_cache); + + softpipe->dirty_render_cache = FALSE; + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_transfers(softpipe); /* Enable to dump BMPs of the color/depth buffers each frame */ #if 0 @@ -108,6 +101,13 @@ softpipe_flush( struct pipe_context *pipe, *fence = NULL; } +void +softpipe_flush_wrapped( struct pipe_context *pipe, + struct pipe_fence_handle **fence ) +{ + softpipe_flush(pipe, SP_FLUSH_TEXTURE_CACHE, fence); +} + /** * Flush context if necessary. @@ -129,21 +129,18 @@ softpipe_flush_resource(struct pipe_context *pipe, { unsigned referenced; - referenced = pipe->is_resource_referenced(pipe, texture, level, layer); + referenced = softpipe_is_resource_referenced(pipe, texture, level, layer); - if ((referenced & PIPE_REFERENCED_FOR_WRITE) || - ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + if ((referenced & SP_REFERENCED_FOR_WRITE) || + ((referenced & SP_REFERENCED_FOR_READ) && !read_only)) { /* * TODO: The semantics of these flush flags are too obtuse. They should * disappear and the pipe driver should just ensure that all visible * side-effects happen when they need to happen. */ - if (referenced & PIPE_REFERENCED_FOR_WRITE) - flush_flags |= PIPE_FLUSH_RENDER_CACHE; - - if (referenced & PIPE_REFERENCED_FOR_READ) - flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + if (referenced & SP_REFERENCED_FOR_READ) + flush_flags |= SP_FLUSH_TEXTURE_CACHE; if (cpu_access) { /* @@ -155,14 +152,15 @@ softpipe_flush_resource(struct pipe_context *pipe, if (do_not_block) return FALSE; - pipe->flush(pipe, flush_flags, &fence); + softpipe_flush(pipe, flush_flags, &fence); if (fence) { /* * This is for illustrative purposes only, as softpipe does not * have fences. */ - pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_finish(pipe->screen, fence, + PIPE_TIMEOUT_INFINITE); pipe->screen->fence_reference(pipe->screen, &fence, NULL); } } else { @@ -170,7 +168,7 @@ softpipe_flush_resource(struct pipe_context *pipe, * Just flush. */ - pipe->flush(pipe, flush_flags, NULL); + softpipe_flush(pipe, flush_flags, NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 22a5ceeb9ec..ab01c249abe 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -33,10 +33,17 @@ struct pipe_context; struct pipe_fence_handle; +#define SP_FLUSH_TEXTURE_CACHE 0x2 + void -softpipe_flush(struct pipe_context *pipe, unsigned flags, +softpipe_flush(struct pipe_context *pipe, + unsigned flags, struct pipe_fence_handle **fence); +void +softpipe_flush_wrapped( struct pipe_context *pipe, + struct pipe_fence_handle **fence ); + boolean softpipe_flush_resource(struct pipe_context *pipe, struct pipe_resource *texture, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 401e3177b48..26f5e1b5740 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -125,7 +125,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 0; case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; - case PIPE_CAP_INSTANCED_DRAWING: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; case PIPE_CAP_ARRAY_TEXTURES: return 1; @@ -181,8 +182,7 @@ softpipe_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned bind, - unsigned geom_flags ) + unsigned bind) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; const struct util_format_description *format_desc; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8a4ef934348..c09ce19559c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1795,6 +1795,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, const struct pipe_sampler_state *sampler = samp->sampler; int j, k0, k1, k2, k3; float val; + float pc0, pc1, pc2, pc3; samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); @@ -1804,43 +1805,48 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, * RGBA channels. We look at the red channel here. */ + pc0 = CLAMP(p[0], 0.0F, 1.0F); + pc1 = CLAMP(p[1], 0.0F, 1.0F); + pc2 = CLAMP(p[2], 0.0F, 1.0F); + pc3 = CLAMP(p[3], 0.0F, 1.0F); + /* compare four texcoords vs. four texture samples */ switch (sampler->compare_func) { case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; + k0 = pc0 < rgba[0][0]; + k1 = pc1 < rgba[0][1]; + k2 = pc2 < rgba[0][2]; + k3 = pc3 < rgba[0][3]; break; case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; + k0 = pc0 <= rgba[0][0]; + k1 = pc1 <= rgba[0][1]; + k2 = pc2 <= rgba[0][2]; + k3 = pc3 <= rgba[0][3]; break; case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; + k0 = pc0 > rgba[0][0]; + k1 = pc1 > rgba[0][1]; + k2 = pc2 > rgba[0][2]; + k3 = pc3 > rgba[0][3]; break; case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; + k0 = pc0 >= rgba[0][0]; + k1 = pc1 >= rgba[0][1]; + k2 = pc2 >= rgba[0][2]; + k3 = pc3 >= rgba[0][3]; break; case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; + k0 = pc0 == rgba[0][0]; + k1 = pc1 == rgba[0][1]; + k2 = pc2 == rgba[0][2]; + k3 = pc3 == rgba[0][3]; break; case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; + k0 = pc0 != rgba[0][0]; + k1 = pc1 != rgba[0][1]; + k2 = pc2 != rgba[0][2]; + k3 = pc3 != rgba[0][3]; break; case PIPE_FUNC_ALWAYS: k0 = k1 = k2 = k3 = 1; diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index 05eab8a517d..1ed1d5d25bb 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -422,7 +422,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, struct svga_transfer *st, // IN SVGA3dTransferType transfer, // IN const SVGA3dCopyBox *boxes, // IN - uint32 numBoxes) // IN + uint32 numBoxes, // IN + SVGA3dSurfaceDMAFlags flags) // IN { struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCmdSurfaceDMA *cmd; @@ -465,7 +466,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize); pSuffix->suffixSize = sizeof *pSuffix; pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride; - memset(&pSuffix->flags, 0, sizeof pSuffix->flags); + pSuffix->flags = flags; swc->commit(swc); diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h index 0e568d78e65..223ab17df81 100644 --- a/src/gallium/drivers/svga/svga_cmd.h +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -102,7 +102,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, struct svga_transfer *st, SVGA3dTransferType transfer, const SVGA3dCopyBox *boxes, - uint32 numBoxes); + uint32 numBoxes, + SVGA3dSurfaceDMAFlags flags); enum pipe_error SVGA3D_BufferDMA(struct svga_winsys_context *swc, diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index f0f875b2b23..4782b4bf70e 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -241,7 +241,8 @@ void svga_context_flush( struct svga_context *svga, if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) - svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0); + svga->pipe.screen->fence_finish( svga->pipe.screen, fence, + PIPE_TIMEOUT_INFINITE); } if(pfence) diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index d98b9b0e000..2093bcae101 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -73,12 +73,6 @@ retry_draw_range_elements( struct svga_context *svga, if (ret) goto retry; - if (svga->curr.any_user_vertex_buffers) { - ret = svga_hwtnl_flush( svga->hwtnl ); - if (ret) - goto retry; - } - return PIPE_OK; retry: @@ -122,12 +116,6 @@ retry_draw_arrays( struct svga_context *svga, if (ret) goto retry; - if (svga->curr.any_user_vertex_buffers) { - ret = svga_hwtnl_flush( svga->hwtnl ); - if (ret) - goto retry; - } - return 0; retry: @@ -161,7 +149,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* We're switching between SW and HW drawing. Do a flush to avoid * mixing HW and SW rendering with the same vertex buffer. */ - pipe->flush(pipe, ~0, NULL); + pipe->flush(pipe, NULL); svga->prev_draw_swtnl = svga->state.sw.need_swtnl; } diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index 9357d827f28..4578c136cb8 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -32,7 +32,6 @@ static void svga_flush( struct pipe_context *pipe, - unsigned flags, struct pipe_fence_handle **fence ) { struct svga_context *svga = svga_context(pipe); @@ -45,29 +44,27 @@ static void svga_flush( struct pipe_context *pipe, */ svga_context_flush(svga, fence); - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", - __FUNCTION__, flags, fence ? *fence : 0x0); + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", + __FUNCTION__, fence ? *fence : 0x0); /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { - if (flags & PIPE_FLUSH_FRAME) { - struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; - static unsigned frame_no = 1; - char filename[256]; - unsigned i; + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + static unsigned frame_no = 1; + char filename[256]; + unsigned i; - for (i = 0; i < fb->nr_cbufs; i++) { - util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no); - debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]); - } - - if (0 && fb->zsbuf) { - util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no); - debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf); - } + for (i = 0; i < fb->nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]); + } - ++frame_no; + if (0 && fb->zsbuf) { + util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no); + debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf); } + + ++frame_no; } } diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c index bed15ec02e5..6e0622a312b 100644 --- a/src/gallium/drivers/svga/svga_resource.c +++ b/src/gallium/drivers/svga/svga_resource.c @@ -33,7 +33,6 @@ svga_resource_from_handle(struct pipe_screen * screen, void svga_init_resource_functions(struct svga_context *svga) { - svga->pipe.is_resource_referenced = u_is_resource_referenced_vtbl; svga->pipe.get_transfer = u_get_transfer_vtbl; svga->pipe.transfer_map = u_transfer_map_vtbl; svga->pipe.transfer_flush_region = u_transfer_flush_region_vtbl; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index e1f07d655b9..34ab9e143e6 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -51,53 +51,114 @@ svga_buffer_needs_hw_storage(unsigned usage) } -static unsigned int -svga_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned level, int layer) +/** + * Create a buffer transfer. + * + * Unlike texture DMAs (which are written immediately to the command buffer and + * therefore inherently serialized with other context operations), for buffers + * we try to coalesce multiple range mappings (i.e, multiple calls to this + * function) into a single DMA command, for better efficiency in command + * processing. This means we need to exercise extra care here to ensure that + * the end result is exactly the same as if one DMA was used for every mapped + * range. + */ +static struct pipe_transfer * +svga_buffer_get_transfer(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box) { + struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_buffer *sbuf = svga_buffer(buf); + struct svga_buffer *sbuf = svga_buffer(resource); + struct pipe_transfer *transfer; - /** - * XXX: Check this. - * The screen may cache buffer writes, but when we map, we map out - * of those cached writes, so we don't need to set a - * PIPE_REFERENCED_FOR_WRITE flag for cached buffers. - */ + transfer = CALLOC_STRUCT(pipe_transfer); + if (transfer == NULL) { + return NULL; + } - if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle)) - return PIPE_UNREFERENCED; + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; - /** - * sws->surface_is_flushed() does not distinguish between read references - * and write references. So assume a reference is both, - * however, we make an exception for index- and vertex buffers, to avoid - * a flush in st_bufferobj_get_subdata, during display list replay. - */ + if (usage & PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + /* + * Finish writing any pending DMA commands, and tell the host to discard + * the buffer contents on the next DMA operation. + */ - if (sbuf->b.b.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - return PIPE_REFERENCED_FOR_READ; + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} + /* + * Instead of flushing the context command buffer, simply discard + * the current hwbuf, and start a new one. + */ + svga_buffer_destroy_hw_storage(ss, sbuf); + } + sbuf->map.num_ranges = 0; + sbuf->dma.flags.discard = TRUE; + } + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + if (!sbuf->map.num_ranges) { + /* + * No pending ranges to upload so far, so we can tell the host to + * not synchronize on the next DMA command. + */ + sbuf->dma.flags.unsynchronized = TRUE; + } + } else { + /* + * Synchronizing, so finish writing any pending DMA command, and + * ensure the next DMA will be done in order. + */ + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + + if (sbuf->hwbuf) { + /* + * We have a pending DMA upload from a hardware buffer, therefore + * we need to ensure that the host finishes processing that DMA + * command before the state tracker can start overwriting the + * hardware buffer. + * + * XXX: This could be avoided by tying the hardware buffer to + * the transfer (just as done with textures), which would allow + * overlapping DMAs commands to be queued on the same context + * buffer. However, due to the likelihood of software vertex + * processing, it is more convenient to hold on to the hardware + * buffer, allowing to quickly access the contents from the CPU + * without having to do a DMA download from the host. + */ + + if (usage & PIPE_TRANSFER_DONTBLOCK) { + /* + * Flushing the command buffer here will most likely cause + * the map of the hwbuf below to block, so preemptively + * return NULL here if DONTBLOCK is set to prevent unnecessary + * command buffer flushes. + */ + + FREE(transfer); + return NULL; + } + + svga_context_flush(svga, NULL); + } + } -static void * -svga_buffer_map_range( struct pipe_screen *screen, - struct pipe_resource *buf, - unsigned offset, - unsigned length, - unsigned usage ) -{ - struct svga_screen *ss = svga_screen(screen); - struct svga_winsys_screen *sws = ss->sws; - struct svga_buffer *sbuf = svga_buffer( buf ); - void *map; + sbuf->dma.flags.unsynchronized = FALSE; + } + } if (!sbuf->swbuf && !sbuf->hwbuf) { if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { @@ -113,86 +174,120 @@ svga_buffer_map_range( struct pipe_screen *screen, } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); + if (!sbuf->swbuf) { + FREE(transfer); + return NULL; + } } } + return transfer; +} + + +/** + * Map a range of a buffer. + */ +static void * +svga_buffer_transfer_map( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ + struct svga_buffer *sbuf = svga_buffer(transfer->resource); + + uint8_t *map; + if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (sbuf->hwbuf) { - map = sws->buffer_map(sws, sbuf->hwbuf, usage); + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = ss->sws; + + map = sws->buffer_map(sws, sbuf->hwbuf, transfer->usage); } else { map = NULL; } - if(map) { + if (map) { ++sbuf->map.count; - - if (usage & PIPE_TRANSFER_WRITE) { - assert(sbuf->map.count <= 1); - sbuf->map.writing = TRUE; - if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) - sbuf->map.flush_explicit = TRUE; - } + map += transfer->box.x; } return map; } - -static void -svga_buffer_flush_mapped_range( struct pipe_screen *screen, - struct pipe_resource *buf, - unsigned offset, unsigned length) +static void +svga_buffer_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct svga_buffer *sbuf = svga_buffer( buf ); - struct svga_screen *ss = svga_screen(screen); - + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_buffer *sbuf = svga_buffer(transfer->resource); + + unsigned offset = transfer->box.x + box->x; + unsigned length = box->width; + + assert(transfer->usage & PIPE_TRANSFER_WRITE); + assert(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT); + pipe_mutex_lock(ss->swc_mutex); - assert(sbuf->map.writing); - if(sbuf->map.writing) { - assert(sbuf->map.flush_explicit); - svga_buffer_add_range(sbuf, offset, offset + length); - } + svga_buffer_add_range(sbuf, offset, offset + length); pipe_mutex_unlock(ss->swc_mutex); } -static void -svga_buffer_unmap( struct pipe_screen *screen, - struct pipe_resource *buf) + +static void +svga_buffer_transfer_unmap( struct pipe_context *pipe, + struct pipe_transfer *transfer ) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; - struct svga_buffer *sbuf = svga_buffer( buf ); + struct svga_buffer *sbuf = svga_buffer(transfer->resource); pipe_mutex_lock(ss->swc_mutex); assert(sbuf->map.count); - if(sbuf->map.count) + if (sbuf->map.count) { --sbuf->map.count; + } - if(sbuf->hwbuf) + if (sbuf->hwbuf) { sws->buffer_unmap(sws, sbuf->hwbuf); + } + + if (transfer->usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + /* + * Mapped range not flushed explicitly, so flush the whole buffer, + * and tell the host to discard the contents when processing the DMA + * command. + */ - if(sbuf->map.writing) { - if(!sbuf->map.flush_explicit) { - /* No mapped range was flushed -- flush the whole buffer */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); + sbuf->dma.flags.discard = TRUE; + svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } - - sbuf->map.writing = FALSE; - sbuf->map.flush_explicit = FALSE; } pipe_mutex_unlock(ss->swc_mutex); } +/** + * Destroy transfer + */ +static void +svga_buffer_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + FREE(transfer); +} + static void svga_buffer_destroy( struct pipe_screen *screen, @@ -221,62 +316,12 @@ svga_buffer_destroy( struct pipe_screen *screen, } -/* Keep the original code more or less intact, implement transfers in - * terms of the old functions. - */ -static void * -svga_buffer_transfer_map( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - uint8_t *map = svga_buffer_map_range( pipe->screen, - transfer->resource, - transfer->box.x, - transfer->box.width, - transfer->usage ); - if (map == NULL) - return NULL; - - /* map_buffer() returned a pointer to the beginning of the buffer, - * but transfers are expected to return a pointer to just the - * region specified in the box. - */ - return map + transfer->box.x; -} - - - -static void svga_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - assert(box->x + box->width <= transfer->box.width); - - svga_buffer_flush_mapped_range(pipe->screen, - transfer->resource, - transfer->box.x + box->x, - box->width); -} - -static void svga_buffer_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - svga_buffer_unmap(pipe->screen, - transfer->resource); -} - - - - - - - struct u_resource_vtbl svga_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ svga_buffer_destroy, /* resource_destroy */ - svga_buffer_is_referenced, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ + svga_buffer_get_transfer, /* get_transfer */ + svga_buffer_transfer_destroy, /* transfer_destroy */ svga_buffer_transfer_map, /* transfer_map */ svga_buffer_transfer_flush_region, /* transfer_flush_region */ svga_buffer_transfer_unmap, /* transfer_unmap */ diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index c559f70ec12..95032213fa5 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -90,7 +90,9 @@ struct svga_buffer * Host surface handle. * * This is a platform independent abstraction for host SID. We create when - * trying to bind + * trying to bind. + * + * Only set for non-user buffers. */ struct svga_winsys_surface *handle; @@ -100,24 +102,10 @@ struct svga_buffer struct { /** * Number of concurrent mappings. - * - * XXX: It is impossible to guarantee concurrent maps work in all - * circumstances -- pipe_buffers really need transfer objects too. */ unsigned count; /** - * Whether this buffer is currently mapped for writing. - */ - boolean writing; - - /** - * Whether the application will tell us explicity which ranges it touched - * or not. - */ - boolean flush_explicit; - - /** * Dirty ranges. * * Ranges that were touched by the application and need to be uploaded to @@ -149,6 +137,8 @@ struct svga_buffer * A piece of GMR memory, with the same size of the buffer. It is created * when mapping the buffer, and will be used to upload vertex data to the * host. + * + * Only set for non-user buffers. */ struct svga_winsys_buffer *hwbuf; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index b7d54605e66..e5fcec08d6e 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -131,6 +131,8 @@ enum pipe_error svga_buffer_create_host_surface(struct svga_screen *ss, struct svga_buffer *sbuf) { + assert(!sbuf->user); + if(!sbuf->handle) { sbuf->key.flags = 0; @@ -252,7 +254,7 @@ svga_buffer_upload_command(struct svga_context *svga, * Patch up the upload DMA command reserved by svga_buffer_upload_command * with the final ranges. */ -static void +void svga_buffer_upload_flush(struct svga_context *svga, struct svga_buffer *sbuf) { @@ -260,6 +262,10 @@ svga_buffer_upload_flush(struct svga_context *svga, unsigned i; struct pipe_resource *dummy; + if (!sbuf->dma.pending) { + return; + } + assert(sbuf->handle); assert(sbuf->hwbuf); assert(sbuf->map.num_ranges); @@ -296,6 +302,8 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->head.next = sbuf->head.prev = NULL; #endif sbuf->dma.pending = FALSE; + sbuf->dma.flags.discard = FALSE; + sbuf->dma.flags.unsynchronized = FALSE; sbuf->dma.svga = NULL; sbuf->dma.boxes = NULL; @@ -306,7 +314,6 @@ svga_buffer_upload_flush(struct svga_context *svga, } - /** * Note a dirty range. * @@ -337,12 +344,6 @@ svga_buffer_add_range(struct svga_buffer *sbuf, /* * Try to grow one of the ranges. - * - * Note that it is not this function task to care about overlapping ranges, - * as the GMR was already given so it is too late to do anything. Situations - * where overlapping ranges may pose a problem should be detected via - * pipe_context::is_resource_referenced and the context that refers to the - * buffer should be flushed. */ for(i = 0; i < sbuf->map.num_ranges; ++i) { @@ -357,6 +358,11 @@ svga_buffer_add_range(struct svga_buffer *sbuf, if (dist <= 0) { /* * Ranges are contiguous or overlapping -- extend this one and return. + * + * Note that it is not this function's task to prevent overlapping + * ranges, as the GMR was already given so it is too late to do + * anything. If the ranges overlap here it must surely be because + * PIPE_TRANSFER_UNSYNCHRONIZED was set. */ sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start); @@ -380,8 +386,7 @@ svga_buffer_add_range(struct svga_buffer *sbuf, * pending DMA upload and start clean. */ - if(sbuf->dma.pending) - svga_buffer_upload_flush(sbuf->dma.svga, sbuf); + svga_buffer_upload_flush(sbuf->dma.svga, sbuf); assert(!sbuf->dma.pending); assert(!sbuf->dma.svga); @@ -662,6 +667,9 @@ svga_redefine_user_buffer(struct pipe_context *pipe, struct svga_buffer *sbuf = svga_buffer(resource); assert(sbuf->user); + assert(!sbuf->dma.pending); + assert(!sbuf->handle); + assert(!sbuf->hwbuf); /* * Release any uploaded user buffer. @@ -674,29 +682,9 @@ svga_redefine_user_buffer(struct pipe_context *pipe, pipe_mutex_lock(ss->swc_mutex); - if (offset + size > resource->width0) { - /* - * User buffers shouldn't have DMA directly, unless - * SVGA_COMBINE_USERBUFFERS is not set. - */ - - if (sbuf->dma.pending) { - svga_buffer_upload_flush(svga, sbuf); - } - - if (sbuf->handle) { - svga_buffer_destroy_host_surface(ss, sbuf); - } - - if (sbuf->hwbuf) { - svga_buffer_destroy_hw_storage(ss, sbuf); - } - - sbuf->key.size.width = sbuf->b.b.width0 = offset + size; - } + sbuf->key.size.width = sbuf->b.b.width0 = offset + size; pipe_mutex_unlock(ss->swc_mutex); - svga->curr.any_user_vertex_buffers = TRUE; svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT; } diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.h b/src/gallium/drivers/svga/svga_resource_buffer_upload.h index 11df3065263..13d8f3e299b 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.h +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.h @@ -28,6 +28,10 @@ void +svga_buffer_upload_flush(struct svga_context *svga, + struct svga_buffer *sbuf); + +void svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end); diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 994f30719ae..b61f85955a2 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -48,31 +48,6 @@ #define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) -static unsigned int -svga_texture_is_referenced( struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, int layer) -{ - struct svga_texture *tex = svga_texture(texture); - struct svga_screen *ss = svga_screen(pipe->screen); - - /** - * The screen does not cache texture writes. - */ - - if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle)) - return PIPE_UNREFERENCED; - - /** - * sws->surface_is_flushed() does not distinguish between read references - * and write references. So assume a reference is both. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - - /* * Helper function and arrays */ @@ -156,7 +131,8 @@ static INLINE void svga_transfer_dma_band(struct svga_context *svga, struct svga_transfer *st, SVGA3dTransferType transfer, - unsigned y, unsigned h, unsigned srcy) + unsigned y, unsigned h, unsigned srcy, + SVGA3dSurfaceDMAFlags flags) { struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCopyBox box; @@ -192,10 +168,10 @@ svga_transfer_dma_band(struct svga_context *svga, util_format_get_blocksize(texture->b.b.format) * 8 / (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format))); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); if(ret != PIPE_OK) { svga_context_flush(svga, NULL); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); assert(ret == PIPE_OK); } } @@ -204,7 +180,8 @@ svga_transfer_dma_band(struct svga_context *svga, static INLINE void svga_transfer_dma(struct svga_context *svga, struct svga_transfer *st, - SVGA3dTransferType transfer) + SVGA3dTransferType transfer, + SVGA3dSurfaceDMAFlags flags) { struct svga_texture *texture = svga_texture(st->base.resource); struct svga_screen *screen = svga_screen(texture->b.b.screen); @@ -223,7 +200,9 @@ svga_transfer_dma(struct svga_context *svga, if(!st->swbuf) { /* Do the DMA transfer in a single go */ - svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0); + svga_transfer_dma_band(svga, st, transfer, + st->base.box.y, st->base.box.height, 0, + flags); if(transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); @@ -269,7 +248,14 @@ svga_transfer_dma(struct svga_context *svga, } } - svga_transfer_dma_band(svga, st, transfer, y, h, srcy); + svga_transfer_dma_band(svga, st, transfer, y, h, srcy, flags); + + /* + * Prevent the texture contents to be discarded on the next band + * upload. + */ + + flags.discard = FALSE; if(transfer == SVGA3D_READ_HOST_VRAM) { svga_context_flush(svga, &fence); @@ -398,8 +384,11 @@ svga_texture_get_transfer(struct pipe_context *pipe, goto no_swbuf; } - if (usage & PIPE_TRANSFER_READ) - svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM); + if (usage & PIPE_TRANSFER_READ) { + SVGA3dSurfaceDMAFlags flags; + memset(&flags, 0, sizeof flags); + svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags); + } return &st->base; @@ -458,7 +447,17 @@ svga_texture_transfer_destroy(struct pipe_context *pipe, struct svga_transfer *st = svga_transfer(transfer); if (st->base.usage & PIPE_TRANSFER_WRITE) { - svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM); + SVGA3dSurfaceDMAFlags flags; + + memset(&flags, 0, sizeof flags); + if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + flags.discard = TRUE; + } + if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + flags.unsynchronized = TRUE; + } + + svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags); ss->texture_timestamp++; tex->view_age[transfer->level] = ++(tex->age); if (transfer->resource->target == PIPE_TEXTURE_CUBE) @@ -481,7 +480,6 @@ struct u_resource_vtbl svga_texture_vtbl = { svga_texture_get_handle, /* get_handle */ svga_texture_destroy, /* resource_destroy */ - svga_texture_is_referenced, /* is_resource_referenced */ svga_texture_get_transfer, /* get_transfer */ svga_texture_transfer_destroy, /* transfer_destroy */ svga_texture_transfer_map, /* transfer_map */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index ef1d3098d51..6c987abe056 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -342,8 +342,7 @@ svga_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags ) + unsigned tex_usage) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; SVGA3dDevCapIndex index; @@ -412,27 +411,26 @@ svga_fence_reference(struct pipe_screen *screen, } -static int +static boolean svga_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) + struct pipe_fence_handle *fence) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; - return sws->fence_signalled(sws, fence, flag); + return sws->fence_signalled(sws, fence, 0) == 0; } -static int +static boolean svga_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, - unsigned flag) + uint64_t timeout) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", __FUNCTION__, fence); - return sws->fence_finish(sws, fence, flag); + return sws->fence_finish(sws, fence, 0) == 0; } @@ -498,6 +496,12 @@ svga_screen_create(struct svga_winsys_screen *sws) svga_init_screen_resource_functions(svgascreen); + if (sws->get_hw_version) { + svgascreen->hw_version = sws->get_hw_version(sws); + } else { + svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; + } + svgascreen->use_ps30 = sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index 86ec89d88c1..6d8d287ce95 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -39,8 +39,6 @@ struct svga_winsys_screen; struct svga_winsys_context; struct SVGACmdMemory; -#define SVGA_COMBINE_USERBUFFERS 1 - /** * Subclass of pipe_screen */ @@ -49,6 +47,8 @@ struct svga_screen struct pipe_screen screen; struct svga_winsys_screen *sws; + SVGA3dHardwareVersion hw_version; + unsigned use_ps30; unsigned use_vs30; diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 958d00393f2..7c393a1da8d 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -78,7 +78,6 @@ upload_user_buffers( struct svga_context *svga ) buffer->b.b.width0); } - pipe_resource_reference( &svga->curr.vb[i].buffer, buffer->uploaded.buffer ); svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; } } @@ -110,6 +109,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; + struct svga_buffer *buffer = svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); @@ -127,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, svga_hwtnl_vdecl( svga->hwtnl, i, &decl, + buffer->uploaded.buffer ? buffer->uploaded.buffer : vb->buffer ); } @@ -148,14 +149,10 @@ static int emit_hw_vdecl( struct svga_context *svga, * userbuffers now and try to combine multiple userbuffers from * multiple draw calls into a single host buffer for performance. */ - if (svga->curr.any_user_vertex_buffers && - SVGA_COMBINE_USERBUFFERS) - { + if (svga->curr.any_user_vertex_buffers) { ret = upload_user_buffers( svga ); if (ret) return ret; - - svga->curr.any_user_vertex_buffers = FALSE; } return emit_hw_vs_vdecl( svga, dirty ); diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 5e4bdeff2ee..ae61cea083f 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -136,6 +136,9 @@ struct svga_winsys_screen void (*destroy)(struct svga_winsys_screen *sws); + SVGA3dHardwareVersion + (*get_hw_version)(struct svga_winsys_screen *sws); + boolean (*get_cap)(struct svga_winsys_screen *sws, SVGA3dDevCapIndex index, @@ -243,12 +246,12 @@ struct svga_winsys_screen /** * Map the entire data store of a buffer object into the client's address. - * flags is a bitmaks of PIPE_TRANSFER_* + * usage is a bitmask of PIPE_TRANSFER_* */ void * (*buffer_map)( struct svga_winsys_screen *sws, struct svga_winsys_buffer *buf, - unsigned flags ); + unsigned usage ); void (*buffer_unmap)( struct svga_winsys_screen *sws, diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index d24cc623c2e..4db7619c424 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1184,7 +1184,6 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe, static INLINE void trace_context_flush(struct pipe_context *_pipe, - unsigned flags, struct pipe_fence_handle **fence) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -1193,9 +1192,8 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "flush"); trace_dump_arg(ptr, pipe); - trace_dump_arg(uint, flags); - pipe->flush(pipe, flags, fence); + pipe->flush(pipe, fence); if(fence) trace_dump_ret(ptr, *fence); @@ -1219,31 +1217,6 @@ trace_context_destroy(struct pipe_context *_pipe) FREE(tr_ctx); } -static unsigned int -trace_is_resource_referenced( struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned level, int layer) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_resource *tr_tex = trace_resource(_resource); - struct pipe_context *pipe = tr_ctx->pipe; - struct pipe_resource *texture = tr_tex->resource; - unsigned int referenced; - - trace_dump_call_begin("pipe_context", "is_resource_referenced"); - trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, texture); - trace_dump_arg(uint, level); - trace_dump_arg(int, layer); - - referenced = pipe->is_resource_referenced(pipe, texture, level, layer); - - trace_dump_ret(uint, referenced); - trace_dump_call_end(); - - return referenced; -} - /******************************************************************** * transfer @@ -1520,7 +1493,6 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.clear_render_target = trace_context_clear_render_target; tr_ctx->base.clear_depth_stencil = trace_context_clear_depth_stencil; tr_ctx->base.flush = trace_context_flush; - tr_ctx->base.is_resource_referenced = trace_is_resource_referenced; tr_ctx->base.get_transfer = trace_context_get_transfer; tr_ctx->base.transfer_destroy = trace_context_transfer_destroy; diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 51a4ea96335..8a4ec20fb84 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -60,10 +60,9 @@ static struct os_stream *stream = NULL; static unsigned refcount = 0; -static pipe_mutex call_mutex; +pipe_static_mutex(call_mutex); static long unsigned call_no = 0; static boolean dumping = FALSE; -static boolean initialized = FALSE; static INLINE void @@ -225,26 +224,13 @@ trace_dump_trace_close(void) stream = NULL; refcount = 0; call_no = 0; - pipe_mutex_destroy(call_mutex); } } -void trace_dump_init() -{ - if (initialized) - return; - - pipe_mutex_init(call_mutex); - dumping = FALSE; - initialized = TRUE; -} - boolean trace_dump_trace_begin() { const char *filename; - assert(initialized); - filename = debug_get_option("GALLIUM_TRACE", NULL); if(!filename) return FALSE; diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 74c5e83e9e1..62b4fe429b1 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -43,11 +43,6 @@ struct pipe_transfer; struct pipe_box; /* - * Call before use. - */ -void trace_dump_init(void); - -/* * Low level dumping controls. * * Opening the trace file and checking if that is opened. diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index c2de2daa883..42180c4f19e 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -158,8 +158,7 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage, - unsigned geom_flags) + unsigned tex_usage) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -172,10 +171,9 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, trace_dump_arg(int, target); trace_dump_arg(uint, sample_count); trace_dump_arg(uint, tex_usage); - trace_dump_arg(uint, geom_flags); result = screen->is_format_supported(screen, format, target, sample_count, - tex_usage, geom_flags); + tex_usage); trace_dump_ret(bool, result); @@ -393,10 +391,9 @@ trace_screen_fence_reference(struct pipe_screen *_screen, } -static int +static boolean trace_screen_fence_signalled(struct pipe_screen *_screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -406,11 +403,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen, trace_dump_arg(ptr, screen); trace_dump_arg(ptr, fence); - trace_dump_arg(uint, flags); - result = screen->fence_signalled(screen, fence, flags); + result = screen->fence_signalled(screen, fence); - trace_dump_ret(int, result); + trace_dump_ret(bool, result); trace_dump_call_end(); @@ -418,10 +414,10 @@ trace_screen_fence_signalled(struct pipe_screen *_screen, } -static int +static boolean trace_screen_fence_finish(struct pipe_screen *_screen, struct pipe_fence_handle *fence, - unsigned flags) + uint64_t timeout) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; @@ -431,11 +427,11 @@ trace_screen_fence_finish(struct pipe_screen *_screen, trace_dump_arg(ptr, screen); trace_dump_arg(ptr, fence); - trace_dump_arg(uint, flags); + trace_dump_arg(uint, timeout); - result = screen->fence_finish(screen, fence, flags); + result = screen->fence_finish(screen, fence, timeout); - trace_dump_ret(int, result); + trace_dump_ret(bool, result); trace_dump_call_end(); @@ -472,8 +468,6 @@ trace_enabled(void) return trace; firstrun = FALSE; - trace_dump_init(); - if(trace_dump_trace_begin()) { trace_dumping_start(); trace = TRUE; |