summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-02-24 22:02:42 +0100
committerChristian König <[email protected]>2011-02-24 22:02:42 +0100
commitb922a0ce12916a91cfc3e56714913fcf63279ff2 (patch)
treee24fa039925220882d155ccb987f7914e83f4372 /src/gallium/drivers
parentf013b4f8f1329982727691a55cc263e3011d02bf (diff)
parentc0ad70ae31ee5501281b434d56e389fc92b13a3a (diff)
Merge remote branch 'origin/master' into pipe-video
Conflicts: configure.ac src/gallium/auxiliary/Makefile src/gallium/auxiliary/SConscript src/gallium/drivers/r600/r600_asm.c src/gallium/drivers/r600/r600_asm.h src/gallium/drivers/r600/r600_shader.c src/gallium/drivers/r600/r600_state_inlines.h src/gallium/drivers/r600/r600_texture.c
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_vertex.c2
-rw-r--r--src/gallium/drivers/failover/fo_state.c2
-rw-r--r--src/gallium/drivers/galahad/glhd_context.c14
-rw-r--r--src/gallium/drivers/i915/TODO8
-rw-r--r--src/gallium/drivers/i915/i915_context.c27
-rw-r--r--src/gallium/drivers/i915/i915_context.h4
-rw-r--r--src/gallium/drivers/i915/i915_debug.c11
-rw-r--r--src/gallium/drivers/i915/i915_debug.h1
-rw-r--r--src/gallium/drivers/i915/i915_flush.c4
-rw-r--r--src/gallium/drivers/i915/i915_resource_texture.c24
-rw-r--r--src/gallium/drivers/i915/i915_screen.c92
-rw-r--r--src/gallium/drivers/i915/i915_screen.h5
-rw-r--r--src/gallium/drivers/i915/i915_state.c59
-rw-r--r--src/gallium/drivers/i915/i915_state_dynamic.c69
-rw-r--r--src/gallium/drivers/i915/i915_state_emit.c57
-rw-r--r--src/gallium/drivers/i915/i915_state_immediate.c68
-rw-r--r--src/gallium/drivers/i915/i915_winsys.h3
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c2
-rw-r--r--src/gallium/drivers/identity/id_context.c14
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c13
-rw-r--r--src/gallium/drivers/noop/noop_state.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h130
-rw-r--r--src/gallium/drivers/nvc0/nvc0_buffer.c25
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c32
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h12
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.c37
-rw-r--r--src/gallium/drivers/nvc0/nvc0_fence.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_miptree.c6
-rw-r--r--src/gallium/drivers/nvc0/nvc0_mm.c4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.c35
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc.h19
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_emit.c76
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c370
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_print.c40
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_regalloc.c276
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c45
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h5
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_resource.h8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c27
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h14
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c73
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c153
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c155
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h11
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c46
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c7
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c141
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c44
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c95
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h2
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vbo.c3
-rw-r--r--src/gallium/drivers/r300/r300_blit.c308
-rw-r--r--src/gallium/drivers/r300/r300_cb.h54
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c2
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h13
-rw-r--r--src/gallium/drivers/r300/r300_context.c136
-rw-r--r--src/gallium/drivers/r300/r300_context.h150
-rw-r--r--src/gallium/drivers/r300/r300_cs.h51
-rw-r--r--src/gallium/drivers/r300/r300_debug.c3
-rw-r--r--src/gallium/drivers/r300/r300_emit.c293
-rw-r--r--src/gallium/drivers/r300/r300_flush.c49
-rw-r--r--src/gallium/drivers/r300/r300_fs.c126
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c128
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h2
-rw-r--r--src/gallium/drivers/r300/r300_query.c23
-rw-r--r--src/gallium/drivers/r300/r300_reg.h9
-rw-r--r--src/gallium/drivers/r300/r300_render.c444
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c237
-rw-r--r--src/gallium/drivers/r300/r300_resource.c20
-rw-r--r--src/gallium/drivers/r300/r300_screen.c49
-rw-r--r--src/gallium/drivers/r300/r300_screen.h4
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c180
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h37
-rw-r--r--src/gallium/drivers/r300/r300_state.c306
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c130
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h22
-rw-r--r--src/gallium/drivers/r300/r300_texture.c229
-rw-r--r--src/gallium/drivers/r300/r300_texture.h13
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.c343
-rw-r--r--src/gallium/drivers/r300/r300_texture_desc.h14
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c9
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.h3
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c61
-rw-r--r--src/gallium/drivers/r300/r300_vs.c7
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h110
-rw-r--r--src/gallium/drivers/r600/Makefile3
-rw-r--r--src/gallium/drivers/r600/SConscript1
-rw-r--r--src/gallium/drivers/r600/eg_state_inlines.h37
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c388
-rw-r--r--src/gallium/drivers/r600/evergreend.h15
-rw-r--r--src/gallium/drivers/r600/r600.h2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c181
-rw-r--r--src/gallium/drivers/r600/r600_asm.h4
-rw-r--r--src/gallium/drivers/r600/r600_blit.c159
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c328
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c112
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h78
-rw-r--r--src/gallium/drivers/r600/r600_resource.h43
-rw-r--r--src/gallium/drivers/r600/r600_shader.c817
-rw-r--r--src/gallium/drivers/r600/r600_shader.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c385
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c371
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h75
-rw-r--r--src/gallium/drivers/r600/r600_texture.c373
-rw-r--r--src/gallium/drivers/r600/r600_translate.c182
-rw-r--r--src/gallium/drivers/r600/r600_upload.c114
-rw-r--r--src/gallium/drivers/r600/r600d.h27
-rw-r--r--src/gallium/drivers/rbug/rbug_context.c14
-rw-r--r--src/gallium/drivers/rbug/rbug_objects.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h6
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c18
-rw-r--r--src/gallium/drivers/softpipe/sp_state_vertex.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c238
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c42
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c37
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c22
-rw-r--r--src/gallium/drivers/svga/svga_context.c71
-rw-r--r--src/gallium/drivers/svga/svga_context.h23
-rw-r--r--src/gallium/drivers/svga/svga_draw.c7
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c1
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c1
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blit.c4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c8
-rw-r--r--src/gallium/drivers/svga/svga_pipe_flush.c37
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c2
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c35
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c16
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c1
-rw-r--r--src/gallium/drivers/svga/svga_resource.c1
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c15
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.h6
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c68
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c36
-rw-r--r--src/gallium/drivers/svga/svga_sampler_view.c26
-rw-r--r--src/gallium/drivers/svga/svga_sampler_view.h6
-rw-r--r--src/gallium/drivers/svga/svga_screen.c18
-rw-r--r--src/gallium/drivers/svga/svga_state.h4
-rw-r--r--src/gallium/drivers/svga/svga_state_framebuffer.c49
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c23
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c78
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c2
-rw-r--r--src/gallium/drivers/svga/svga_surface.c30
-rw-r--r--src/gallium/drivers/svga/svga_surface.h5
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_backend.c4
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c1
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c236
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_op.c4
-rw-r--r--src/gallium/drivers/trace/tr_context.c23
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c1
164 files changed, 5936 insertions, 4973 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
index eb22a09a913..7f65b82619e 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
@@ -33,6 +33,7 @@
#include "cell_state.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "draw/draw_context.h"
@@ -115,4 +116,5 @@ cell_init_vertex_functions(struct cell_context *cell)
cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
+ cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index af1fd953aaf..b4da1b8b901 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -30,6 +30,7 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "fo_context.h"
@@ -656,4 +657,5 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_constant_buffer = failover_set_constant_buffer;
failover->pipe.create_sampler_view = failover_create_sampler_view;
failover->pipe.sampler_view_destroy = failover_sampler_view_destroy;
+ failover->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c
index 8cbf0b1de4a..75e4c253dd9 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -962,6 +962,19 @@ galahad_context_transfer_inline_write(struct pipe_context *_context,
}
+static void galahad_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct galahad_context *glhd_context = galahad_context(_context);
+ struct galahad_resource *glhd_resource = galahad_resource(_resource);
+ struct pipe_context *context = glhd_context->pipe;
+ struct pipe_resource *resource = glhd_resource->resource;
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
+
+
struct pipe_context *
galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
{
@@ -1036,6 +1049,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap;
glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region;
glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write;
+ glhd_pipe->base.redefine_user_buffer = galahad_redefine_user_buffer;
glhd_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO
index 94c428bebf8..f4e1423fa59 100644
--- a/src/gallium/drivers/i915/TODO
+++ b/src/gallium/drivers/i915/TODO
@@ -1,5 +1,13 @@
Random list of problems with i915g:
+- Check if PIPE_CAP_BLEND_EQUATION_SEPARATE work, the code is there.
+ If not fix it! A simple task, good for beginners.
+
+- Add support for PIPE_CAP_POINT_SPRITE either via the draw module or directly
+ via the hardware, look at the classic driver, more advanced.
+
+- What does this button do? Figure out LIS7 with regards to depth offset.
+
- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally
unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still
broken, it doesn't update the viewport/get new buffers.
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index ea3c10b5e78..707b2e9f956 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -39,6 +39,9 @@
#include "pipe/p_screen.h"
+DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE);
+
+
/*
* Draw functions
*/
@@ -50,7 +53,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
struct i915_context *i915 = i915_context(pipe);
struct draw_context *draw = i915->draw;
void *mapped_indices = NULL;
- unsigned i;
unsigned cbuf_dirty;
@@ -64,14 +66,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
i915_update_derived(i915);
/*
- * Map vertex buffers
- */
- for (i = 0; i < i915->num_vertex_buffers; i++) {
- void *buf = i915_buffer(i915->vertex_buffer[i].buffer)->data;
- draw_set_mapped_vertex_buffer(draw, i, buf);
- }
-
- /*
* Map index buffer, if present
*/
if (info->indexed && i915->index_buffer.buffer)
@@ -90,13 +84,6 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
*/
draw_vbo(i915->draw, info);
- /*
- * unmap vertex/index buffers
- */
- for (i = 0; i < i915->num_vertex_buffers; i++) {
- draw_set_mapped_vertex_buffer(draw, i, NULL);
- }
-
if (mapped_indices)
draw_set_mapped_index_buffer(draw, NULL);
}
@@ -117,10 +104,6 @@ static void i915_destroy(struct pipe_context *pipe)
if(i915->batch)
i915->iws->batchbuffer_destroy(i915->batch);
- for (i = 0; i < i915->num_vertex_buffers; i++) {
- pipe_resource_reference(&i915->vertex_buffer[i].buffer, NULL);
- }
-
/* unbind framebuffer */
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL);
@@ -164,7 +147,7 @@ i915_create_context(struct pipe_screen *screen, void *priv)
*/
i915->draw = draw_create(&i915->base);
assert(i915->draw);
- if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) {
+ if (!debug_get_option_i915_no_vbuf()) {
draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915));
} else {
draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915));
@@ -180,6 +163,8 @@ i915_create_context(struct pipe_screen *screen, void *priv)
i915->dirty = ~0;
i915->hardware_dirty = ~0;
+ i915->immediate_dirty = ~0;
+ i915->dynamic_dirty = ~0;
/* Batch stream debugging is a bit hacked up at the moment:
*/
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index b7f1fb22221..2cf53424f06 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -219,14 +219,12 @@ struct i915_context {
struct pipe_scissor_state scissor;
struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
unsigned dirty;
unsigned num_samplers;
unsigned num_fragment_sampler_views;
- unsigned num_vertex_buffers;
struct i915_winsys_batchbuffer *batch;
@@ -237,6 +235,8 @@ struct i915_context {
struct i915_state current;
unsigned hardware_dirty;
+ unsigned immediate_dirty;
+ unsigned dynamic_dirty;
struct util_slab_mempool transfer_pool;
};
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index 845e92cf5c6..1713bf131f2 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -48,10 +48,15 @@ static const struct debug_named_value debug_options[] = {
unsigned i915_debug = 0;
boolean i915_tiling = TRUE;
-void i915_debug_init(struct i915_screen *screen)
+DEBUG_GET_ONCE_FLAGS_OPTION(i915_debug, "I915_DEBUG", debug_options, 0)
+DEBUG_GET_ONCE_BOOL_OPTION(i915_no_tiling, "I915_NO_TILING", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(i915_lie, "I915_LIE", FALSE)
+
+void i915_debug_init(struct i915_screen *is)
{
- i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0);
- i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE);
+ i915_debug = debug_get_option_i915_debug();
+ is->debug.tiling = !debug_get_option_i915_no_tiling();
+ is->debug.lie = debug_get_option_i915_lie();
}
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index 11af7662f0a..fa60799d0c5 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -46,7 +46,6 @@ struct i915_winsys_batchbuffer;
#define DBG_CONSTANTS 0x20
extern unsigned i915_debug;
-extern boolean i915_tiling;
#ifdef DEBUG
static INLINE boolean
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index a2c70b11991..911c051d1f2 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -74,7 +74,6 @@ static void i915_flush_pipe( struct pipe_context *pipe,
/* If there are no flags, just flush pending commands to hardware:
*/
FLUSH_BATCH(fence);
- i915->vbo_flushed = 1;
I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__);
}
@@ -93,5 +92,8 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
struct i915_winsys_batchbuffer *batch = i915->batch;
batch->iws->batchbuffer_flush(batch, fence);
+ i915->vbo_flushed = 1;
i915->hardware_dirty = ~0;
+ i915->immediate_dirty = ~0;
+ i915->dynamic_dirty = ~0;
}
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index e793d126ade..aad5235a6ad 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -172,15 +172,15 @@ i915_texture_set_image_offset(struct i915_texture *tex,
}
static enum i915_winsys_buffer_tile
-i915_texture_tiling(struct pipe_resource *pt)
+i915_texture_tiling(struct i915_screen *is, struct i915_texture *tex)
{
- if (!i915_tiling)
+ if (!is->debug.tiling)
return I915_TILE_NONE;
- if (pt->target == PIPE_TEXTURE_1D)
+ if (tex->b.b.target == PIPE_TEXTURE_1D)
return I915_TILE_NONE;
- if (util_format_is_s3tc(pt->format))
+ if (util_format_is_s3tc(tex->b.b.format))
/* XXX X-tiling might make sense */
return I915_TILE_NONE;
@@ -401,11 +401,7 @@ i915_texture_layout_3d(struct i915_texture *tex)
static boolean
i915_texture_layout(struct i915_texture * tex)
{
- struct pipe_resource *pt = &tex->b.b;
-
- tex->tiling = i915_texture_tiling(pt);
-
- switch (pt->target) {
+ switch (tex->b.b.target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
@@ -649,11 +645,7 @@ i945_texture_layout_cube(struct i915_texture *tex)
static boolean
i945_texture_layout(struct i915_texture * tex)
{
- struct pipe_resource *pt = &tex->b.b;
-
- tex->tiling = i915_texture_tiling(pt);
-
- switch (pt->target) {
+ switch (tex->b.b.target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
@@ -664,7 +656,7 @@ i945_texture_layout(struct i915_texture * tex)
i945_texture_layout_3d(tex);
break;
case PIPE_TEXTURE_CUBE:
- if (!util_format_is_s3tc(pt->format))
+ if (!util_format_is_s3tc(tex->b.b.format))
i9x5_texture_layout_cube(tex);
else
i945_texture_layout_cube(tex);
@@ -818,6 +810,8 @@ i915_texture_create(struct pipe_screen *screen,
pipe_reference_init(&tex->b.b.reference, 1);
tex->b.b.screen = screen;
+ tex->tiling = i915_texture_tiling(is, tex);
+
if (is->is_i945) {
if (!i945_texture_layout(tex))
goto fail;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index bdbc08e8086..77febbf5012 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -98,59 +98,81 @@ i915_get_name(struct pipe_screen *screen)
}
static int
-i915_get_param(struct pipe_screen *screen, enum pipe_cap param)
+i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
{
- switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- return 8;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 8;
+ struct i915_screen *is = i915_screen(screen);
+
+ switch (cap) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_NPOT_TEXTURES:
- return 1;
+ case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
- case PIPE_CAP_GLSL:
- return 0;
- case PIPE_CAP_ANISOTROPIC_FILTER:
+
+ /* Features that should be supported (boolean caps). */
+ /* XXX: Just test the code */
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 0;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_ARRAY_TEXTURES:
+ case PIPE_CAP_DEPTH_CLAMP:
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: /* disable for now */
+ case PIPE_CAP_GLSL:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_INSTANCED_DRAWING: /* draw module? */
case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TIMER_QUERY:
return 0;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 1;
+
+ /* Features we can lie about (boolean caps). */
case PIPE_CAP_OCCLUSION_QUERY:
+ return is->debug.lie ? 1 : 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 8;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
return 0;
- case PIPE_CAP_TIMER_QUERY:
- return 0;
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return I915_MAX_TEXTURE_2D_LEVELS;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return I915_MAX_TEXTURE_3D_LEVELS;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return I915_MAX_TEXTURE_2D_LEVELS;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+
+ /* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- /* disable for now */
- return 0;
+
default:
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
static int
-i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap cap)
{
switch(shader) {
case PIPE_SHADER_VERTEX:
- return draw_get_shader_param(shader, param);
+ return draw_get_shader_param(shader, cap);
case PIPE_SHADER_FRAGMENT:
break;
default:
@@ -158,7 +180,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
}
/* XXX: these are just shader model 2.0 values, fix this! */
- switch(param) {
+ switch(cap) {
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
return 96;
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
@@ -191,15 +213,15 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
default:
- assert(0);
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
static float
-i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
+i915_get_paramf(struct pipe_screen *screen, enum pipe_cap cap)
{
- switch (param) {
+ switch(cap) {
case PIPE_CAP_MAX_LINE_WIDTH:
/* fall-through */
case PIPE_CAP_MAX_LINE_WIDTH_AA:
@@ -217,6 +239,7 @@ i915_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
return 16.0;
default:
+ debug_printf("%s: Unkown cap %u.\n", __FUNCTION__, cap);
return 0;
}
}
@@ -321,6 +344,20 @@ i915_fence_finish(struct pipe_screen *screen,
static void
+i915_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle)
+{
+ /* XXX: Dummy right now. */
+ (void)screen;
+ (void)resource;
+ (void)level;
+ (void)layer;
+ (void)winsys_drawable_handle;
+}
+
+static void
i915_destroy_screen(struct pipe_screen *screen)
{
struct i915_screen *is = i915_screen(screen);
@@ -371,6 +408,7 @@ i915_screen_create(struct i915_winsys *iws)
is->base.winsys = NULL;
is->base.destroy = i915_destroy_screen;
+ is->base.flush_frontbuffer = i915_flush_frontbuffer;
is->base.get_name = i915_get_name;
is->base.get_vendor = i915_get_vendor;
diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h
index bb4d255a3b3..60f0e2971e0 100644
--- a/src/gallium/drivers/i915/i915_screen.h
+++ b/src/gallium/drivers/i915/i915_screen.h
@@ -45,6 +45,11 @@ struct i915_screen
struct i915_winsys *iws;
boolean is_i945;
+
+ struct {
+ boolean tiling;
+ boolean lie;
+ } debug;
};
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 4a1a4a04f6d..58bbbd1de2c 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -33,6 +33,7 @@
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
#include "i915_context.h"
@@ -57,10 +58,8 @@ translate_wrap_mode(unsigned wrap)
return TEXCOORDMODE_CLAMP_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return TEXCOORDMODE_CLAMP_BORDER;
- /*
- case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
return TEXCOORDMODE_MIRROR;
- */
default:
return TEXCOORDMODE_WRAP;
}
@@ -535,21 +534,31 @@ static void i915_set_constant_buffer(struct pipe_context *pipe,
/* if we have a new buffer compare it with the old one */
if (buf) {
- struct i915_buffer *ir = i915_buffer(buf);
+ struct i915_buffer *ibuf = i915_buffer(buf);
struct pipe_resource *old_buf = i915->constants[shader];
struct i915_buffer *old = old_buf ? i915_buffer(old_buf) : NULL;
-
- new_num = ir->b.b.width0 / 4 * sizeof(float);
-
- if (old && new_num != i915->current.num_user_constants[shader])
- diff = memcmp(old->data, ir->data, ir->b.b.width0);
+ unsigned old_num = i915->current.num_user_constants[shader];
+
+ new_num = ibuf->b.b.width0 / 4 * sizeof(float);
+
+ if (old_num == new_num) {
+ if (old_num == 0)
+ diff = FALSE;
+#if 0
+ /* XXX no point in running this code since st/mesa only uses user buffers */
+ /* Can't compare the buffer data since they are userbuffers */
+ else if (old && old->free_on_destroy)
+ diff = memcmp(old->data, ibuf->data, ibuf->b.b.width0);
+#else
+ (void)old;
+#endif
+ }
} else {
diff = i915->current.num_user_constants[shader] != 0;
}
/*
* flush before updateing the state.
- * XXX: looks like its okay to skip the flush for vertex cbufs
*/
if (diff && shader == PIPE_SHADER_FRAGMENT)
draw_flush(i915->draw);
@@ -766,17 +775,25 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe,
const struct pipe_vertex_buffer *buffers)
{
struct i915_context *i915 = i915_context(pipe);
- /* Because we change state before the draw_set_vertex_buffers call
- * we need a flush here, just to be sure.
- */
- draw_flush(i915->draw);
+ struct draw_context *draw = i915->draw;
+ int i;
- util_copy_vertex_buffers(i915->vertex_buffer,
- &i915->num_vertex_buffers,
- buffers, count);
+#if 0
+ /* XXX doesn't look like this is needed */
+ /* unmap old */
+ for (i = 0; i < i915->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+#endif
/* pass-through to draw module */
- draw_set_vertex_buffers(i915->draw, count, buffers);
+ draw_set_vertex_buffers(draw, count, buffers);
+
+ /* map new */
+ for (i = 0; i < count; i++) {
+ void *buf = i915_buffer(buffers[i].buffer)->data;
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
}
static void *
@@ -801,11 +818,6 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe,
struct i915_context *i915 = i915_context(pipe);
struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems;
- /* Because we change state before the draw_set_vertex_buffers call
- * we need a flush here, just to be sure.
- */
- draw_flush(i915->draw);
-
/* pass-through to draw module */
if (i915_velems) {
draw_set_vertex_elements(i915->draw,
@@ -882,4 +894,5 @@ i915_init_state_functions( struct i915_context *i915 )
i915->base.set_viewport_state = i915_set_viewport_state;
i915->base.set_vertex_buffers = i915_set_vertex_buffers;
i915->base.set_index_buffer = i915_set_index_buffer;
+ i915->base.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index d61a8c3407f..204cee6fe9e 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -46,18 +46,34 @@
* (active) state every time a 4kb boundary is crossed.
*/
-static INLINE void set_dynamic_indirect(struct i915_context *i915,
- unsigned offset,
- const unsigned *src,
- unsigned dwords)
+static INLINE void set_dynamic(struct i915_context *i915,
+ unsigned offset,
+ const unsigned state)
+{
+ if (i915->current.dynamic[offset] == state)
+ return;
+
+ i915->current.dynamic[offset] = state;
+ i915->dynamic_dirty |= 1 << offset;
+ i915->hardware_dirty |= I915_HW_DYNAMIC;
+}
+
+
+
+static INLINE void set_dynamic_array(struct i915_context *i915,
+ unsigned offset,
+ const unsigned *src,
+ unsigned dwords)
{
unsigned i;
if (!memcmp(src, &i915->current.dynamic[offset], dwords * 4))
return;
- for (i = 0; i < dwords; i++)
+ for (i = 0; i < dwords; i++) {
i915->current.dynamic[offset + i] = src[i];
+ i915->dynamic_dirty |= 1 << (offset + i);
+ }
i915->hardware_dirty |= I915_HW_DYNAMIC;
}
@@ -79,12 +95,7 @@ static void upload_MODES4(struct i915_context *i915)
*/
modes4 |= i915->blend->modes4;
- /* Always, so that we know when state is in-active:
- */
- set_dynamic_indirect(i915,
- I915_DYNAMIC_MODES4,
- &modes4,
- 1);
+ set_dynamic(i915, I915_DYNAMIC_MODES4, modes4);
}
const struct i915_tracked_state i915_upload_MODES4 = {
@@ -107,10 +118,7 @@ static void upload_BFO(struct i915_context *i915)
bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT;
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_BFO_0,
- &(bfo[0]),
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_BFO_0, bfo, 2);
}
const struct i915_tracked_state i915_upload_BFO = {
@@ -141,10 +149,7 @@ static void upload_BLENDCOLOR(struct i915_context *i915)
color[3]);
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_BC_0,
- bc,
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_BC_0, bc, 2);
}
const struct i915_tracked_state i915_upload_BLENDCOLOR = {
@@ -161,10 +166,7 @@ static void upload_IAB(struct i915_context *i915)
{
unsigned iab = i915->blend->iab;
- set_dynamic_indirect(i915,
- I915_DYNAMIC_IAB,
- &iab,
- 1);
+ set_dynamic(i915, I915_DYNAMIC_IAB, iab);
}
const struct i915_tracked_state i915_upload_IAB = {
@@ -179,10 +181,8 @@ const struct i915_tracked_state i915_upload_IAB = {
*/
static void upload_DEPTHSCALE(struct i915_context *i915)
{
- set_dynamic_indirect(i915,
- I915_DYNAMIC_DEPTHSCALE_0,
- &(i915->rasterizer->ds[0].u),
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_DEPTHSCALE_0,
+ &i915->rasterizer->ds[0].u, 2);
}
const struct i915_tracked_state i915_upload_DEPTHSCALE = {
@@ -234,10 +234,7 @@ static void upload_STIPPLE(struct i915_context *i915)
(p[3] << 12));
}
- set_dynamic_indirect(i915,
- I915_DYNAMIC_STP_0,
- &st[0],
- 2);
+ set_dynamic_array(i915, I915_DYNAMIC_STP_0, st, 2);
}
const struct i915_tracked_state i915_upload_STIPPLE = {
@@ -253,10 +250,7 @@ const struct i915_tracked_state i915_upload_STIPPLE = {
*/
static void upload_SCISSOR_ENABLE( struct i915_context *i915 )
{
- set_dynamic_indirect(i915,
- I915_DYNAMIC_SC_ENA_0,
- &(i915->rasterizer->sc[0]),
- 1);
+ set_dynamic(i915, I915_DYNAMIC_SC_ENA_0, i915->rasterizer->sc[0]);
}
const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = {
@@ -282,10 +276,7 @@ static void upload_SCISSOR_RECT(struct i915_context *i915)
sc[1] = (y1 << 16) | (x1 & 0xffff);
sc[2] = (y2 << 16) | (x2 & 0xffff);
- set_dynamic_indirect(i915,
- I915_DYNAMIC_SC_RECT_0,
- &sc[0],
- 3);
+ set_dynamic_array(i915, I915_DYNAMIC_SC_RECT_0, sc, 3);
}
const struct i915_tracked_state i915_upload_SCISSOR_RECT = {
diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c
index 5a89977c26c..509d487b498 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -35,6 +35,8 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
static unsigned translate_format( enum pipe_format format )
{
switch (format) {
@@ -178,11 +180,6 @@ i915_emit_hardware_state(struct i915_context *i915 )
ENABLE_TEXKILL_3D_4D |
TEXKILL_4D);
- /* Need to initialize this to zero.
- */
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
- OUT_BATCH(0);
-
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
/* disable indirect state for now
@@ -194,27 +191,30 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 7 dwords, 1 relocs */
if (i915->hardware_dirty & I915_HW_IMMEDIATE)
{
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(0) |
- I1_LOAD_S(1) |
- I1_LOAD_S(2) |
- I1_LOAD_S(4) |
- I1_LOAD_S(5) |
- I1_LOAD_S(6) |
- (5));
-
- if(i915->vbo)
- OUT_RELOC(i915->vbo,
- I915_USAGE_VERTEX,
- i915->current.immediate[I915_IMMEDIATE_S0]);
- else
- /* FIXME: we should not do this */
- OUT_BATCH(0);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]);
- OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]);
+ /* remove unwatned bits and S7 */
+ unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
+ 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
+ 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
+ 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
+ i915->immediate_dirty;
+ int i, num = util_bitcount(dirty);
+ assert(num && num <= I915_MAX_IMMEDIATE);
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ dirty << 4 | (num - 1));
+
+ if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
+ if (i915->vbo)
+ OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
+ i915->current.immediate[I915_IMMEDIATE_S0]);
+ else
+ OUT_BATCH(0);
+ }
+
+ for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
+ if (dirty & (1 << i))
+ OUT_BATCH(i915->current.immediate[i]);
+ }
}
#if 01
@@ -223,7 +223,8 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
int i;
for (i = 0; i < I915_MAX_DYNAMIC; i++) {
- OUT_BATCH(i915->current.dynamic[i]);
+ if (i915->dynamic_dirty & (1 << i));
+ OUT_BATCH(i915->current.dynamic[i]);
}
}
#endif
@@ -443,4 +444,6 @@ i915_emit_hardware_state(struct i915_context *i915 )
i915->batch->relocs - save_relocs);
i915->hardware_dirty = 0;
+ i915->immediate_dirty = 0;
+ i915->dynamic_dirty = 0;
}
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index f9ade7077f2..81348647399 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -36,12 +36,20 @@
#include "util/u_memory.h"
-/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet.
- * Would like to opportunistically recombine all these fragments into
- * a single packet containing only what has changed, but for now emit
- * as multiple packets.
+/* Convinience function to check immediate state.
*/
+static INLINE void set_immediate(struct i915_context *i915,
+ unsigned offset,
+ const unsigned state)
+{
+ if (i915->current.immediate[offset] == state)
+ return;
+
+ i915->current.immediate[offset] = state;
+ i915->immediate_dirty |= 1 << offset;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+}
@@ -56,9 +64,14 @@ static void upload_S0S1(struct i915_context *i915)
*/
LIS0 = i915->vbo_offset;
+ /* Need to force this */
+ if (i915->dirty & I915_NEW_VBO) {
+ i915->immediate_dirty |= 1 << I915_IMMEDIATE_S0;
+ i915->hardware_dirty |= I915_HW_IMMEDIATE;
+ }
+
/* I915_NEW_VERTEX_SIZE
*/
- /* XXX do this where the vertex size is calculated! */
{
unsigned vertex_size = i915->current.vertex_info.size;
@@ -66,16 +79,8 @@ static void upload_S0S1(struct i915_context *i915)
(vertex_size << 16));
}
- /* I915_NEW_VBO
- */
- if (1 ||
- i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 ||
- i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)
- {
- i915->current.immediate[I915_IMMEDIATE_S0] = LIS0;
- i915->current.immediate[I915_IMMEDIATE_S1] = LIS1;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S0, LIS0);
+ set_immediate(i915, I915_IMMEDIATE_S1, LIS1);
}
const struct i915_tracked_state i915_upload_S0S1 = {
@@ -98,21 +103,13 @@ static void upload_S2S4(struct i915_context *i915)
{
LIS2 = i915->current.vertex_info.hwfmt[1];
LIS4 = i915->current.vertex_info.hwfmt[0];
- /*
- debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4);
- */
assert(LIS4); /* should never be zero? */
}
LIS4 |= i915->rasterizer->LIS4;
- if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] ||
- LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) {
-
- i915->current.immediate[I915_IMMEDIATE_S2] = LIS2;
- i915->current.immediate[I915_IMMEDIATE_S4] = LIS4;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S2, LIS2);
+ set_immediate(i915, I915_IMMEDIATE_S4, LIS4);
}
const struct i915_tracked_state i915_upload_S2S4 = {
@@ -142,15 +139,12 @@ static void upload_S5(struct i915_context *i915)
#if 0
/* I915_NEW_RASTERIZER
*/
- if (i915->state.Polygon->OffsetFill) {
+ if (i915->rasterizer->LIS7) {
LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE;
}
#endif
- if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
- i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S5, LIS5);
}
const struct i915_tracked_state i915_upload_S5 = {
@@ -180,14 +174,11 @@ static void upload_S6(struct i915_context *i915)
*/
LIS6 |= i915->depth_stencil->depth_LIS6;
- if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) {
- i915->current.immediate[I915_IMMEDIATE_S6] = LIS6;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+ set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
}
const struct i915_tracked_state i915_upload_S6 = {
- "imm s6",
+ "imm S6",
upload_S6,
I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER
};
@@ -204,10 +195,9 @@ static void upload_S7(struct i915_context *i915)
*/
LIS7 = i915->rasterizer->LIS7;
- if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) {
- i915->current.immediate[I915_IMMEDIATE_S7] = LIS7;
- i915->hardware_dirty |= I915_HW_IMMEDIATE;
- }
+#if 0
+ set_immediate(i915, I915_IMMEDIATE_S7, LIS7);
+#endif
}
const struct i915_tracked_state i915_upload_S7 = {
diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h
index 24ea416f015..e915a886c9b 100644
--- a/src/gallium/drivers/i915/i915_winsys.h
+++ b/src/gallium/drivers/i915/i915_winsys.h
@@ -103,11 +103,12 @@ struct i915_winsys {
* @usage how is the hardware going to use the buffer.
* @offset add this to the reloc buffers address
* @target buffer where to write the address, null for batchbuffer.
+ * @fenced relocation needs a fence.
*/
int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *reloc,
enum i915_winsys_buffer_usage usage,
- unsigned offset, bool fenced);
+ unsigned offset, boolean fenced);
/**
* Flush a bufferbatch.
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index b23454b5808..570ea23ff45 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -4,6 +4,7 @@
#include "util/u_memory.h"
#include "util/u_format.h"
+#include "util/u_transfer.h"
static unsigned brw_translate_surface_format( unsigned id )
@@ -302,6 +303,7 @@ brw_pipe_vertex_init( struct brw_context *brw )
brw->base.create_vertex_elements_state = brw_create_vertex_elements_state;
brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state;
brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state;
+ brw->base.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 3efbd6a246d..b533abe24c6 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -855,6 +855,19 @@ identity_context_transfer_inline_write(struct pipe_context *_context,
}
+static void identity_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct identity_context *id_context = identity_context(_context);
+ struct identity_resource *id_resource = identity_resource(_resource);
+ struct pipe_context *context = id_context->pipe;
+ struct pipe_resource *resource = id_resource->resource;
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
+
+
struct pipe_context *
identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
{
@@ -929,6 +942,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.transfer_unmap = identity_context_transfer_unmap;
id_pipe->base.transfer_flush_region = identity_context_transfer_flush_region;
id_pipe->base.transfer_inline_write = identity_context_transfer_inline_write;
+ id_pipe->base.redefine_user_buffer = identity_redefine_user_buffer;
id_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 26b258b9569..c10a8cbc12c 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -96,10 +96,15 @@ if env['platform'] != 'embedded':
tests.append('round')
for test in tests:
+ testname = 'lp_test_' + test
target = env.Program(
- target = 'lp_test_' + test,
- source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ target = testname,
+ source = [testname + '.c', 'lp_test_main.c'],
)
env.InstallProgram(target)
+
+ # http://www.scons.org/wiki/UnitTests
+ alias = env.Alias(testname, [target], target[0].abspath)
+ AlwaysBuild(alias)
Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 85e3cdec82c..849db06acdf 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -72,7 +72,7 @@ llvmpipe_flush( struct pipe_context *pipe,
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
- debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]);
}
if (0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index fffdeb6ccde..be86f66de91 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -34,6 +34,7 @@
#include "draw/draw_context.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
static void *
@@ -114,4 +115,6 @@ llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer;
+
+ llvmpipe->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 149ee6f1256..d229c620310 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -35,24 +35,13 @@
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_init.h"
#include "lp_test.h"
-#ifdef PIPE_CC_MSVC
-static INLINE double
-round(double x)
-{
- if (x >= 0.0)
- return floor(x + 0.5);
- else
- return ceil(x - 0.5);
-}
-#endif
-
-
void
dump_type(FILE *fp,
struct lp_type type)
diff --git a/src/gallium/drivers/noop/noop_state.c b/src/gallium/drivers/noop/noop_state.c
index ad324774c03..00a4c1eb01e 100644
--- a/src/gallium/drivers/noop/noop_state.c
+++ b/src/gallium/drivers/noop/noop_state.c
@@ -28,6 +28,7 @@
#include <pipe/p_screen.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
+#include "util/u_transfer.h"
static void noop_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
@@ -287,4 +288,5 @@ void noop_init_state_functions(struct pipe_context *ctx)
ctx->sampler_view_destroy = noop_sampler_view_destroy;
ctx->surface_destroy = noop_surface_destroy;
ctx->draw_vbo = noop_draw_vbo;
+ ctx->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 27eb3817bf1..679e5ea1485 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -299,7 +299,7 @@ check_swap_src_0_1(struct nv_instruction *nvi)
}
if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
}
static int
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 53f94820ce0..f3418df8381 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -45,7 +45,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type)
struct nv50_query *q = CALLOC_STRUCT(nv50_query);
int ret;
- assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+ assert (type == PIPE_QUERY_OCCLUSION_COUNTER);
q->type = type;
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 256,
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index b4eda0f617d..ba2c3e8c281 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -23,6 +23,7 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
@@ -886,5 +887,6 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
nv50->pipe.set_index_buffer = nv50_set_index_buffer;
+ nv50->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index d6b80c3ea79..ce9300ad8fd 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -1130,7 +1130,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
case TGSI_FILE_INPUT:
res = bld_saved_input(bld, idx, swz);
if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
- return res;
+ break;
res = new_value(bld->pc, bld->ti->input_file, type);
res->reg.id = bld->ti->input_map[idx][swz];
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 61932ff2b6a..73a605f94e1 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -84,6 +84,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_EARLY_FRAGMENT_TESTS 0x00000210
+#define NVC0_3D_MEM_BARRIER 0x0000021c
+#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+
#define NVC0_3D_TESS_MODE 0x00000320
#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
@@ -122,11 +130,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
-#define NVC0_3D_TFB_UNK0700(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010
+#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004
#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004
#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010
+#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004
#define NVC0_3D_TFB_ENABLE 0x00000744
@@ -144,29 +158,29 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_RT__ESIZE 0x00000020
#define NVC0_3D_RT__LEN 0x00000008
-#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x20*(i0))
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0))
-#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x20*(i0))
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0))
-#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x20*(i0))
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0))
-#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x20*(i0))
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0))
-#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x20*(i0))
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0))
-#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x20*(i0))
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0))
#define NVC0_3D_RT_TILE_MODE_UNK0 0x00000001
#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
-#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x20*(i0))
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0))
#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
-#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x20*(i0))
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0))
#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
@@ -216,21 +230,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0) (0x00000d00 + 0x8*(i0))
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK 0x0000ffff
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT 0
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK 0xffff0000
-#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT 16
-
-#define NVC0_3D_VIEWPORT_CLIP_VERT(i0) (0x00000d04 + 0x8*(i0))
-#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN 0x00000008
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK 0x0000ffff
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT 0
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK 0xffff0000
-#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT 16
+#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16
#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
@@ -356,6 +370,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+#define NVC0_3D_CLEAR_FLAGS 0x000010f8
+#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010
+#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100
+#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000
+
#define NVC0_3D_VERTEX_ID 0x00001118
#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
@@ -561,7 +581,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
-#define NVC0_3D_STENCIL_FRONT_ENABLE 0x00001380
+#define NVC0_3D_STENCIL_ENABLE 0x00001380
#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
@@ -605,9 +625,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
-#define NVC0_3D_STENCIL_FRONT_MASK 0x00001398
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398
-#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c
#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
@@ -642,6 +662,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLIPID_HEIGHT 0x00001504
#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE 0x00001510
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0 0x00000001
#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1 0x00000002
@@ -814,8 +846,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
-#define NVC0_3D_POINT_COORD_REPLACE_BITS__MASK 0x00001fff
-#define NVC0_3D_POINT_COORD_REPLACE_BITS__SHIFT 0
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3
#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
@@ -864,8 +900,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
-#define NVC0_3D_POINT_SPRITE_CTRL 0x00001660
-
#define NVC0_3D_TEX_MISC 0x00001664
#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
@@ -928,22 +962,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3 0x00000008
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12 0x00001000
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13 0x00002000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
-#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN 0x0000194c
+#define NVC0_3D_CLIP_RECTS_EN 0x0000194c
-#define NVC0_3D_VIEWPORT_CLIP_MODE 0x00001950
-#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY 0x00000000
-#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL 0x00000001
-#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER 0x00000002
+#define NVC0_3D_CLIP_RECTS_MODE 0x00001950
+#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002
#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
@@ -996,6 +1036,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+#define NVC0_3D_CLIPID_FILL 0x000019d4
+
#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
#define NVC0_3D_COLOR_MASK__LEN 0x00000008
@@ -1155,9 +1197,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
-#define NVC0_3D_TFB_VARYING_LOCS(i0) (0x00002800 + 0x4*(i0))
+#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
-#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000080
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
#define NVC0_3D_COLOR_MASK_BROADCAST 0x00003808
diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
index ea3e642a448..aa949bdfa36 100644
--- a/src/gallium/drivers/nvc0/nvc0_buffer.c
+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
@@ -59,15 +59,23 @@ release_allocation(struct nvc0_mm_allocation **mm, struct nvc0_fence *fence)
(*mm) = NULL;
}
-static INLINE boolean
-nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
- unsigned domain)
+INLINE void
+nvc0_buffer_release_gpu_storage(struct nvc0_resource *buf)
{
nouveau_bo_ref(NULL, &buf->bo);
if (buf->mm)
release_allocation(&buf->mm, buf->fence);
+ buf->domain = 0;
+}
+
+static INLINE boolean
+nvc0_buffer_reallocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
+ unsigned domain)
+{
+ nvc0_buffer_release_gpu_storage(buf);
+
return nvc0_buffer_allocate(screen, buf, domain);
}
@@ -77,10 +85,7 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen,
{
struct nvc0_resource *res = nvc0_resource(presource);
- nouveau_bo_ref(NULL, &res->bo);
-
- if (res->mm)
- release_allocation(&res->mm, res->fence);
+ nvc0_buffer_release_gpu_storage(res);
if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
FREE(res->data);
@@ -112,7 +117,7 @@ nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
memcpy(buf->data + start, bounce->map, size);
nouveau_bo_unmap(bounce);
- buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+ buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
nouveau_bo_ref(NULL, &bounce);
if (mm)
@@ -151,7 +156,7 @@ nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
release_allocation(&mm, nvc0->screen->fence.current);
if (start == 0 && size == buf->base.width0)
- buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+ buf->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
return TRUE;
}
@@ -174,7 +179,7 @@ nvc0_buffer_transfer_get(struct pipe_context *pipe,
if (buf->domain == NOUVEAU_BO_VRAM) {
if (usage & PIPE_TRANSFER_READ) {
- if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
+ if (buf->status & NVC0_BUFFER_STATUS_GPU_WRITING)
nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
}
}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 2118abb5d5d..f02de4d044a 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -41,17 +41,18 @@ nvc0_flush(struct pipe_context *pipe, unsigned flags,
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
OUT_RING (chan, 0x00);
+ } else
+ if ((flags & PIPE_FLUSH_RENDER_CACHE) && !(flags & PIPE_FLUSH_FRAME)) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
}
- if (fence) {
- nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
- }
+ if (fence)
+ nvc0_fence_reference((struct nvc0_fence **)fence,
+ nvc0->screen->fence.current);
- if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME))
FIRE_RING(chan);
-
- nvc0_screen_fence_next(nvc0->screen);
- }
}
static void
@@ -67,6 +68,16 @@ nvc0_destroy(struct pipe_context *pipe)
FREE(nvc0);
}
+void
+nvc0_default_flush_notify(struct nouveau_channel *chan)
+{
+ struct nvc0_context *nvc0 = chan->user_private;
+
+ nvc0_screen_fence_update(nvc0->screen, TRUE);
+
+ nvc0_screen_fence_next(nvc0->screen);
+}
+
struct pipe_context *
nvc0_create(struct pipe_screen *pscreen, void *priv)
{
@@ -91,6 +102,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0->pipe.flush = nvc0_flush;
screen->base.channel->user_private = nvc0;
+ screen->base.channel->flush_notify = nvc0_default_flush_notify;
nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
@@ -148,12 +160,14 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
{
struct resident *rsd;
struct util_dynarray *array;
- unsigned ctx, i;
+ unsigned ctx, i, n;
for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
array = &nvc0->residents[ctx];
- for (i = 0; i < array->size / sizeof(struct resident); ++i) {
+ n = array->size / sizeof(struct resident);
+ MARK_RING(nvc0->screen->base.channel, n, n);
+ for (i = 0; i < n; ++i) {
rsd = util_dynarray_element(array, struct resident, i);
nvc0_resource_validate(rsd->res, rsd->flags);
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 94117988e50..1ce5554f7b7 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -54,6 +54,8 @@
#define NVC0_NEW_CONSTBUF (1 << 18)
#define NVC0_NEW_TEXTURES (1 << 19)
#define NVC0_NEW_SAMPLERS (1 << 20)
+#define NVC0_NEW_TFB (1 << 21)
+#define NVC0_NEW_TFB_BUFFERS (1 << 22)
#define NVC0_BUFCTX_CONSTANT 0
#define NVC0_BUFCTX_FRAME 1
@@ -79,6 +81,7 @@ struct nvc0_context {
uint8_t num_vtxelts;
uint8_t num_textures[5];
uint8_t num_samplers[5];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
uint16_t scissor;
uint32_t uniform_buffer_bound[5];
} state;
@@ -123,6 +126,11 @@ struct nvc0_context {
boolean vbo_dirty;
boolean vbo_push_hint;
+ struct nvc0_transform_feedback_state *tfb;
+ struct pipe_resource *tfbbuf[4];
+ unsigned num_tfbbufs;
+ unsigned tfb_offset[4];
+
struct draw_context *draw;
};
@@ -149,6 +157,8 @@ nvc0_surface(struct pipe_surface *ps)
/* nvc0_context.c */
struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+void nvc0_default_flush_notify(struct nouveau_channel *);
+
void nvc0_bufctx_emit_relocs(struct nvc0_context *);
void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
struct nvc0_resource *, uint32_t flags);
@@ -177,6 +187,8 @@ void nvc0_tevlprog_validate(struct nvc0_context *);
void nvc0_gmtyprog_validate(struct nvc0_context *);
void nvc0_fragprog_validate(struct nvc0_context *);
+void nvc0_tfb_validate(struct nvc0_context *);
+
/* nvc0_state.c */
extern void nvc0_init_state_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
index 9d2c48cf14d..f2d4b1451bf 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.c
+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
@@ -55,6 +55,7 @@ nvc0_fence_emit(struct nvc0_fence *fence)
assert(fence->state == NVC0_FENCE_STATE_AVAILABLE);
+ MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
@@ -83,7 +84,8 @@ nvc0_fence_del(struct nvc0_fence *fence)
struct nvc0_fence *it;
struct nvc0_screen *screen = fence->screen;
- if (fence->state == NVC0_FENCE_STATE_EMITTED) {
+ if (fence->state == NVC0_FENCE_STATE_EMITTED ||
+ fence->state == NVC0_FENCE_STATE_FLUSHED) {
if (fence == screen->fence.head) {
screen->fence.head = fence->next;
if (!screen->fence.head)
@@ -118,8 +120,8 @@ nvc0_fence_trigger_release_buffers(struct nvc0_fence *fence)
fence->buffers = NULL;
}
-static void
-nvc0_screen_fence_update(struct nvc0_screen *screen)
+void
+nvc0_screen_fence_update(struct nvc0_screen *screen, boolean flushed)
{
struct nvc0_fence *fence;
struct nvc0_fence *next = NULL;
@@ -146,38 +148,43 @@ nvc0_screen_fence_update(struct nvc0_screen *screen)
screen->fence.head = next;
if (!next)
screen->fence.tail = NULL;
-}
-#define NVC0_FENCE_MAX_SPINS (1 << 17)
+ if (flushed) {
+ for (fence = next; fence; fence = fence->next)
+ fence->state = NVC0_FENCE_STATE_FLUSHED;
+ }
+}
boolean
nvc0_fence_signalled(struct nvc0_fence *fence)
{
struct nvc0_screen *screen = fence->screen;
- if (fence->state == NVC0_FENCE_STATE_EMITTED)
- nvc0_screen_fence_update(screen);
+ if (fence->state >= NVC0_FENCE_STATE_EMITTED)
+ nvc0_screen_fence_update(screen, FALSE);
return fence->state == NVC0_FENCE_STATE_SIGNALLED;
}
+#define NVC0_FENCE_MAX_SPINS (1 << 31)
+
boolean
nvc0_fence_wait(struct nvc0_fence *fence)
{
struct nvc0_screen *screen = fence->screen;
- int spins = 0;
+ uint32_t spins = 0;
- if (fence->state == NVC0_FENCE_STATE_AVAILABLE) {
+ if (fence->state < NVC0_FENCE_STATE_EMITTED) {
nvc0_fence_emit(fence);
- FIRE_RING(screen->base.channel);
-
if (fence == screen->fence.current)
nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
}
+ if (fence->state < NVC0_FENCE_STATE_FLUSHED)
+ FIRE_RING(screen->base.channel);
do {
- nvc0_screen_fence_update(screen);
+ nvc0_screen_fence_update(screen, FALSE);
if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
return TRUE;
@@ -188,8 +195,9 @@ nvc0_fence_wait(struct nvc0_fence *fence)
#endif
} while (spins < NVC0_FENCE_MAX_SPINS);
- if (spins > 9000)
- NOUVEAU_ERR("fence %x: been spinning too long\n", fence->sequence);
+ debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
+ fence->sequence,
+ screen->fence.sequence_ack, screen->fence.sequence);
return FALSE;
}
@@ -199,5 +207,4 @@ nvc0_screen_fence_next(struct nvc0_screen *screen)
{
nvc0_fence_emit(screen->fence.current);
nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
- nvc0_screen_fence_update(screen);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
index e63c164bda4..3d8c3f8ba60 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.h
+++ b/src/gallium/drivers/nvc0/nvc0_fence.h
@@ -7,7 +7,8 @@
#define NVC0_FENCE_STATE_AVAILABLE 0
#define NVC0_FENCE_STATE_EMITTED 1
-#define NVC0_FENCE_STATE_SIGNALLED 2
+#define NVC0_FENCE_STATE_FLUSHED 2
+#define NVC0_FENCE_STATE_SIGNALLED 3
struct nvc0_mm_allocation;
diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
index 7c7e134146e..ea3ed9e0225 100644
--- a/src/gallium/drivers/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nvc0/nvc0_miptree.c
@@ -143,8 +143,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
switch (pt->format) {
case PIPE_FORMAT_Z16_UNORM:
tile_flags = 0x0700; /* COMPRESSED */
- tile_flags = 0x0200; /* NORMAL ? */
- tile_flags = 0x0100; /* NORMAL ? */
+ tile_flags = 0x0100; /* NORMAL */
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
tile_flags = 0x5300; /* MSAA 4, COMPRESSED */
@@ -170,6 +169,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
break;
case PIPE_FORMAT_R16G16B16A16_UNORM:
tile_flags = 0xe900; /* COMPRESSED */
+ tile_flags = 0xfe00; /* NORMAL */
break;
default:
tile_flags = 0xe000; /* MSAA 4, COMPRESSED 32 BIT */
@@ -283,7 +283,7 @@ nvc0_miptree_surface_new(struct pipe_context *pipe,
pipe_reference_init(&ps->reference, 1);
pipe_resource_reference(&ps->texture, pt);
ps->context = pipe;
- ps->format = pt->format;
+ ps->format = templ->format;
ps->usage = templ->usage;
ps->u.tex.level = templ->u.tex.level;
ps->u.tex.first_layer = templ->u.tex.first_layer;
diff --git a/src/gallium/drivers/nvc0/nvc0_mm.c b/src/gallium/drivers/nvc0/nvc0_mm.c
index 0629dad19c9..516d2e31b55 100644
--- a/src/gallium/drivers/nvc0/nvc0_mm.c
+++ b/src/gallium/drivers/nvc0/nvc0_mm.c
@@ -96,13 +96,13 @@ mm_bucket_by_size(struct nvc0_mman *cache, unsigned size)
static INLINE uint32_t
mm_default_slab_size(unsigned chunk_order)
{
- assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
-
static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
{
12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22
};
+ assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
+
return 1 << slab_order[chunk_order - MM_MIN_ORDER];
}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
index 304a1919768..f51d289e8cd 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc.c
@@ -44,6 +44,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,
if (ld->indirect >= 0)
return FALSE;
+ /* a few ops can use g[] sources directly, but we don't support g[] yet */
+ if (ld->src[0]->value->reg.file == NV_FILE_MEM_L ||
+ ld->src[0]->value->reg.file == NV_FILE_MEM_G)
+ return FALSE;
+
for (i = 0; i < 3 && nvi->src[i]; ++i)
if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
return FALSE;
@@ -55,15 +60,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,
boolean
nvc0_insn_is_predicateable(struct nv_instruction *nvi)
{
- int s;
-
- if (!nv_op_predicateable(nvi->opcode))
+ if (nvi->predicate >= 0) /* already predicated */
return FALSE;
- if (nvi->predicate >= 0)
+ if (!nvc0_op_info_table[nvi->opcode].predicate &&
+ !nvc0_op_info_table[nvi->opcode].pseudo)
return FALSE;
- for (s = 0; s < 4 && nvi->src[s]; ++s)
- if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
- return FALSE;
return TRUE;
}
@@ -103,6 +104,12 @@ nvc0_pc_replace_value(struct nv_pc *pc,
return n;
}
+static INLINE boolean
+is_gpr63(struct nv_value *val)
+{
+ return (val->reg.file == NV_FILE_GPR && val->reg.id == 63);
+}
+
struct nv_value *
nvc0_pc_find_constant(struct nv_ref *ref)
{
@@ -116,7 +123,7 @@ nvc0_pc_find_constant(struct nv_ref *ref)
assert(!src->insn->src[0]->mod);
src = src->insn->src[0]->value;
}
- if ((src->reg.file == NV_FILE_IMM) ||
+ if ((src->reg.file == NV_FILE_IMM) || is_gpr63(src) ||
(src->insn &&
src->insn->opcode == NV_OP_LD &&
src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
@@ -130,7 +137,7 @@ nvc0_pc_find_immediate(struct nv_ref *ref)
{
struct nv_value *src = nvc0_pc_find_constant(ref);
- return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+ return (src && (src->reg.file == NV_FILE_IMM || is_gpr63(src))) ? src : NULL;
}
static void
@@ -187,7 +194,10 @@ nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
bb[p++] = b->out[j];
break;
case CFG_EDGE_LOOP_LEAVE:
- bbb[pp++] = b->out[j];
+ if (!b->out[j]->priv) {
+ bbb[pp++] = b->out[j];
+ b->out[j]->priv = 1;
+ }
break;
default:
assert(0);
@@ -499,6 +509,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
i->bb = b;
b->num_instructions++;
+
+ if (i->prev && i->prev->terminator)
+ nvc0_insns_permute(i->prev, i);
}
void
@@ -512,6 +525,8 @@ nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
ni->prev = at;
ni->next->prev = ni;
ni->prev->next = ni;
+ ni->bb = at->bb;
+ ni->bb->num_instructions++;
}
void
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
index 969cc68c596..efa073a9201 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc.h
+++ b/src/gallium/drivers/nvc0/nvc0_pc.h
@@ -53,7 +53,8 @@
/**
* BIND forces source operand i into the same register as destination operand i,
- * and the operands will be assigned consecutive registers (needed for TEX)
+ * and the operands will be assigned consecutive registers (needed for TEX).
+ * Beware conflicts !
* SELECT forces its multiple source operands and its destination operand into
* one and the same register.
*/
@@ -204,6 +205,10 @@
#define NV_CC_C 0x11
#define NV_CC_A 0x12
#define NV_CC_S 0x13
+#define NV_CC_INVERSE(cc) ((cc) ^ 0x7)
+/* for 1 bit predicates: */
+#define NV_CC_P 0
+#define NV_CC_NOT_P 1
#define NV_PC_MAX_INSTRUCTIONS 2048
#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
@@ -259,12 +264,6 @@ nv_op_supported_src_mods(uint opcode)
return nvc0_op_info_table[opcode].mods;
}
-static INLINE boolean
-nv_op_predicateable(uint opcode)
-{
- return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
-}
-
static INLINE uint
nv_type_order(ubyte type)
{
@@ -310,7 +309,7 @@ struct nv_reg {
int32_t s32;
int64_t s64;
uint64_t u64;
- uint32_t u32;
+ uint32_t u32; /* expected to be 0 for $r63 */
float f32;
double f64;
} imm;
@@ -344,6 +343,8 @@ struct nv_ref {
uint8_t flags;
};
+#define NV_REF_FLAG_REGALLOC_PRIV (1 << 0)
+
struct nv_basic_block;
struct nv_instruction {
@@ -485,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode)
assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
insn->opcode = opcode;
- insn->cc = 0;
+ insn->cc = NV_CC_P;
insn->indirect = -1;
insn->predicate = -1;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
index db8055d91cd..c10f920e6f1 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
@@ -236,7 +236,7 @@ emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
*/
pc->emit[0] |= (pcrel & 0x3f) << 26;
- pc->emit[1] |= (pcrel >> 6) & 0x1ffff;
+ pc->emit[1] |= (pcrel >> 6) & 0x3ffff;
}
}
@@ -393,6 +393,8 @@ emit_tex(struct nv_pc *pc, struct nv_instruction *i)
{
int src1 = i->tex_array + i->tex_dim + i->tex_cube;
+ assert(src1 < 6);
+
pc->emit[0] = 0x00000086;
pc->emit[1] = 0x80000000;
@@ -446,7 +448,7 @@ emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
pc->emit[0] |= op << 26;
- if (op >= 4) {
+ if (op >= 3) {
if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
} else {
@@ -477,6 +479,7 @@ emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
{
i->quadop = 0x99;
i->lanes = 4;
+ i->src[1] = i->src[0];
emit_quadop(pc, i);
}
@@ -485,6 +488,7 @@ emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
{
i->quadop = 0xa5;
i->lanes = 5;
+ i->src[1] = i->src[0];
emit_quadop(pc, i);
}
@@ -629,25 +633,28 @@ emit_slct(struct nv_pc *pc, struct nv_instruction *i)
static void
emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
{
+ uint32_t rint;
+
pc->emit[0] = 0x00000004;
pc->emit[1] = 0x10000000;
- if (i->opcode != NV_OP_CVT)
+ /* if no type conversion specified, get type from opcode */
+ if (i->opcode != NV_OP_CVT && i->ext.cvt.d == i->ext.cvt.s)
i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
switch (i->ext.cvt.d) {
case NV_TYPE_F32:
switch (i->ext.cvt.s) {
case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
- case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
}
break;
- case NV_TYPE_S32: pc->emit[0] |= 0x80;
+ case NV_TYPE_S32: pc->emit[0] |= 0x80; /* fall through */
case NV_TYPE_U32:
switch (i->ext.cvt.s) {
case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
- case NV_TYPE_S32: pc->emit[0] |= 0x200;
+ case NV_TYPE_S32: pc->emit[0] |= 0x200; /* fall through */
case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
}
break;
@@ -656,14 +663,20 @@ emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
break;
}
- if (i->opcode == NV_OP_FLOOR)
- pc->emit[1] |= 0x00020000;
- else
- if (i->opcode == NV_OP_CEIL)
- pc->emit[1] |= 0x00040000;
- else
- if (i->opcode == NV_OP_TRUNC)
- pc->emit[1] |= 0x00060000;
+ rint = (i->ext.cvt.d == NV_TYPE_F32) ? 1 << 7 : 0;
+
+ if (i->opcode == NV_OP_FLOOR) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 2 << 16;
+ } else
+ if (i->opcode == NV_OP_CEIL) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 4 << 16;
+ } else
+ if (i->opcode == NV_OP_TRUNC) {
+ pc->emit[0] |= rint;
+ pc->emit[1] |= 6 << 16;
+ }
if (i->saturate || i->opcode == NV_OP_SAT)
pc->emit[0] |= 0x20;
@@ -793,11 +806,8 @@ emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
}
static void
-emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+emit_ld_common(struct nv_pc *pc, struct nv_instruction *i)
{
- pc->emit[0] = 0x00000006;
- pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
-
emit_ldst_size(pc, i);
set_pred(pc, i);
@@ -808,6 +818,15 @@ emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
}
static void
+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x00000006;
+ pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
+
+ emit_ld_common(pc, i);
+}
+
+static void
emit_ld(struct nv_pc *pc, struct nv_instruction *i)
{
if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
@@ -818,6 +837,12 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
} else {
emit_ld_const(pc, i);
}
+ } else
+ if (SFILE(i, 0) == NV_FILE_MEM_L) {
+ pc->emit[0] = 0x00000005;
+ pc->emit[1] = 0xc0000000;
+
+ emit_ld_common(pc, i);
} else {
NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
abort();
@@ -827,8 +852,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
static void
emit_st(struct nv_pc *pc, struct nv_instruction *i)
{
- NOUVEAU_ERR("emit_st: not handled yet\n");
- abort();
+ if (SFILE(i, 0) != NV_FILE_MEM_L)
+ NOUVEAU_ERR("emit_st(%u): file not handled yet\n", SFILE(i, 0));
+
+ pc->emit[0] = 0x00000005 | (0 << 8); /* write-back caching */
+ pc->emit[1] = 0xc8000000;
+
+ emit_ldst_size(pc, i);
+
+ set_pred(pc, i);
+ set_address_16(pc, i->src[0]);
+
+ SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+ DID(pc, i->src[1]->value, 14);
}
void
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
index acc72bff14c..c5a7367a5fd 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi)
nvc0_insn_refcount(nvi)));
}
+/* Check if we do not actually have to emit this instruction. */
static INLINE boolean
inst_is_noop(struct nv_instruction *nvi)
{
@@ -141,9 +142,10 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
struct nv_instruction *nvi, *next;
int j;
+ /* find first non-empty block emitted before b */
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
- if (j >= 0) {
+ for (; j >= 0; --j) {
in = pc->bb_list[j];
/* check for no-op branches (BRA $PC+8) */
@@ -157,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
nvc0_insn_delete(in->exit);
}
b->emit_pos = in->emit_pos + in->emit_size;
+
+ if (in->emit_size) /* no more no-op branches to b */
+ break;
}
pc->bb_list[pc->num_blocks++] = b;
@@ -240,10 +245,15 @@ check_swap_src_0_1(struct nv_instruction *nvi)
struct nv_ref *src0 = nvi->src[0];
struct nv_ref *src1 = nvi->src[1];
- if (!nv_op_commutative(nvi->opcode))
+ if (!nv_op_commutative(nvi->opcode) &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SET &&
+ NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
return;
assert(src0 && src1 && src0->value && src1->value);
+ if (src1->value->reg.file != NV_FILE_GPR)
+ return;
+
if (is_cspace_load(src0->value->insn)) {
if (!is_cspace_load(src1->value->insn)) {
nvi->src[0] = src1;
@@ -258,8 +268,13 @@ check_swap_src_0_1(struct nv_instruction *nvi)
}
}
- if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ if (nvi->src[0] != src0) {
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
+ else
+ if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
+ nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
+ }
}
static void
@@ -543,6 +558,7 @@ constant_operand(struct nv_pc *pc,
nv_reference(pc, nvi, s, nvi->src[t]->value);
nvi->src[s]->mod = nvi->src[t]->mod;
}
+ break;
case NV_OP_ADD_F32:
if (u.u32 == 0) {
switch (nvi->src[t]->mod) {
@@ -562,6 +578,7 @@ constant_operand(struct nv_pc *pc,
if (nvi->opcode != NV_OP_CVT)
nvi->src[0]->mod = 0;
}
+ break;
case NV_OP_ADD_B32:
if (u.u32 == 0) {
assert(nvi->src[t]->mod == 0);
@@ -582,7 +599,7 @@ constant_operand(struct nv_pc *pc,
} else
if (u.s32 > 0 && u.s32 == (1 << shift)) {
nvi->opcode = NV_OP_SHL;
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_U32))->reg.imm.s32 = shift;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
nv_reference(pc, nvi, 0, nvi->src[t]->value);
nv_reference(pc, nvi, 1, val);
break;
@@ -590,14 +607,14 @@ constant_operand(struct nv_pc *pc,
break;
case NV_OP_RCP:
u.f32 = 1.0f / u.f32;
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
break;
case NV_OP_RSQ:
u.f32 = 1.0f / sqrtf(u.f32);
- (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
nvi->opcode = NV_OP_MOV;
assert(s == 0);
nv_reference(pc, nvi, 0, val);
@@ -607,25 +624,83 @@ constant_operand(struct nv_pc *pc,
}
}
+static void
+handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+
+ if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
+ return;
+ if (src0->reg.file != NV_FILE_GPR)
+ return;
+ nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
+ nvc0_insn_delete(nvi);
+}
+
+/* check if we can MUL + ADD -> MAD/FMA */
+static void
+handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+ struct nv_value *src0 = nvi->src[0]->value;
+ struct nv_value *src1 = nvi->src[1]->value;
+ struct nv_value *src;
+ int s;
+ uint8_t mod[4];
+
+ if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
+ else
+ if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
+ else
+ return;
+
+ if ((src0->insn && src0->insn->bb != nvi->bb) ||
+ (src1->insn && src1->insn->bb != nvi->bb))
+ return;
+
+ /* check for immediates from prior constant folding */
+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+ return;
+ src = nvi->src[s]->value;
+
+ mod[0] = nvi->src[0]->mod;
+ mod[1] = nvi->src[1]->mod;
+ mod[2] = src->insn->src[0]->mod;
+ mod[3] = src->insn->src[1]->mod;
+
+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+ return;
+
+ nvi->opcode = NV_OP_MAD_F32;
+
+ nv_reference(ctx->pc, nvi, s, NULL);
+ nvi->src[2] = nvi->src[!s];
+ nvi->src[!s] = NULL;
+
+ nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
+ nvi->src[0]->mod = mod[2] ^ mod[s];
+ nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
+ nvi->src[1]->mod = mod[3];
+}
+
static int
-nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *nvi, *next;
int j;
for (nvi = b->entry; nvi; nvi = next) {
- struct nv_value *src0, *src1, *src;
- int s;
- uint8_t mod[4];
+ struct nv_value *src0, *src1;
+ uint baseop = NV_BASEOP(nvi->opcode);
next = nvi->next;
src0 = nvc0_pc_find_immediate(nvi->src[0]);
src1 = nvc0_pc_find_immediate(nvi->src[1]);
- if (src0 && src1)
+ if (src0 && src1) {
constant_expression(ctx->pc, nvi, src0, src1);
- else {
+ } else {
if (src0)
constant_operand(ctx->pc, nvi, src0, 0);
else
@@ -633,44 +708,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
constant_operand(ctx->pc, nvi, src1, 1);
}
- /* check if we can MUL + ADD -> MAD/FMA */
- if (nvi->opcode != NV_OP_ADD)
- continue;
-
- src0 = nvi->src[0]->value;
- src1 = nvi->src[1]->value;
-
- if (SRC_IS_MUL(src0) && src0->refc == 1)
- src = src0;
+ if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
+ handle_min_max(ctx, nvi);
else
- if (SRC_IS_MUL(src1) && src1->refc == 1)
- src = src1;
- else
- continue;
-
- /* could have an immediate from above constant_* */
- if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
- continue;
- s = (src == src0) ? 0 : 1;
-
- mod[0] = nvi->src[0]->mod;
- mod[1] = nvi->src[1]->mod;
- mod[2] = src->insn->src[0]->mod;
- mod[3] = src->insn->src[0]->mod;
-
- if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
- continue;
-
- nvi->opcode = NV_OP_MAD;
- nv_reference(ctx->pc, nvi, s, NULL);
- nvi->src[2] = nvi->src[!s];
-
- nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
- nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
- nvi->src[0]->mod = mod[2] ^ mod[s];
- nvi->src[1]->mod = mod[3];
+ if (nvi->opcode == NV_OP_ADD_F32)
+ handle_add_mul(ctx, nvi);
}
- DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+ DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
return 0;
}
@@ -700,8 +744,12 @@ struct pass_reld_elim {
int alloc;
};
+/* Extend the load operation in @rec to also cover the data loaded by @ld.
+ * The two loads may not overlap but reference adjacent memory locations.
+ */
static void
-combine_load(struct mem_record *rec, struct nv_instruction *ld)
+combine_load(struct nv_pc *pc, struct mem_record *rec,
+ struct nv_instruction *ld)
{
struct nv_instruction *fv = rec->insn;
struct nv_value *mem = ld->src[0]->value;
@@ -716,7 +764,7 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld)
return;
rec->ofst = mem->reg.address;
for (j = 0; j < d; ++j)
- fv->def[d + j] = fv->def[j];
+ fv->def[mem->reg.size / 4 + j] = fv->def[j];
d = 0;
} else
if ((size == 8 && rec->ofst & 3) ||
@@ -729,6 +777,9 @@ combine_load(struct mem_record *rec, struct nv_instruction *ld)
fv->def[d++]->insn = fv;
}
+ if (fv->src[0]->value->refc > 1)
+ nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
+ fv->src[0]->value->reg.address = rec->ofst;
fv->src[0]->value->reg.size = rec->size = size;
nvc0_insn_delete(ld);
@@ -793,6 +844,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
((it->ofst >> 4) == (ofst >> 4)) &&
((it->ofst + it->size == ofst) ||
(it->ofst - mem->reg.size == ofst))) {
+ /* only NV_OP_VFETCH can load exactly 12 bytes */
if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
continue;
if (it->ofst < ofst) {
@@ -808,7 +860,7 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
switch (ld->opcode) {
case NV_OP_EXPORT: combine_export(it, ld); break;
default:
- combine_load(it, ld);
+ combine_load(ctx->pc, it, ld);
break;
}
} else
@@ -817,6 +869,11 @@ nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
}
}
+ ctx->alloc = 0;
+ ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
+ for (s = 0; s < 16; ++s)
+ ctx->mem_c[s] = NULL;
+
DESCEND_ARBITRARY(s, nv_pass_mem_opt);
return 0;
}
@@ -1006,7 +1063,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
return 0;
}
-#if 0
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
* BREAK and dummy ELSE block.
@@ -1027,61 +1083,187 @@ bb_is_if_else_endif(struct nv_basic_block *bb)
}
}
-/* predicate instructions and remove branch at the end */
+/* Predicate instructions and delete any branch at the end if it is
+ * not a break from a loop.
+ */
static void
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
- struct nv_value *p, ubyte cc)
+ struct nv_value *pred, uint8_t cc)
{
+ struct nv_instruction *nvi, *prev;
+ int s;
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ prev = nvi;
+ if (inst_is_noop(nvi))
+ continue;
+ for (s = 0; nvi->src[s]; ++s);
+ assert(s < 6);
+ nvi->predicate = s;
+ nvi->cc = cc;
+ nv_reference(pc, nvi, nvi->predicate, pred);
+ }
+ if (prev->opcode == NV_OP_BRA &&
+ b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
+ b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
+ nvc0_insn_delete(prev);
}
-#endif
-/* NOTE: Run this after register allocation, we can just cut out the cflow
- * instructions and hook the predicates to the conditional OPs if they are
- * not using immediates; better than inserting SELECT to join definitions.
- *
- * NOTE: Should adapt prior optimization to make this possible more often.
+static INLINE boolean
+may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
+{
+ if (nvi->def[0] && values_equal(nvi->def[0], pred))
+ return FALSE;
+ return nvc0_insn_is_predicateable(nvi);
+}
+
+/* Transform IF/ELSE/ENDIF constructs into predicated instructions
+ * where feasible.
*/
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int k;
+ int n0, n1; /* instruction counts of outgoing blocks */
+
+ if (bb_is_if_else_endif(b)) {
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+
+ assert(b->exit->predicate >= 0);
+ pred = b->exit->src[b->exit->predicate]->value;
+
+ n1 = n0 = 0;
+ for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ if (!nvi) {
+ /* we're after register allocation, so there always is an ELSE block */
+ for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!may_predicate_insn(nvi, pred))
+ break;
+ }
+
+ /* 12 is an arbitrary limit */
+ if (!nvi && n0 < 12 && n1 < 12) {
+ predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
+ predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
+
+ nvc0_insn_delete(b->exit); /* delete the branch */
+
+ /* and a potential joinat before it */
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nvc0_insn_delete(b->exit);
+
+ /* remove join operations at the end of the conditional */
+ k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+ if ((nvi = b->out[0]->out[k]->entry)) {
+ nvi->join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nvc0_insn_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(k, nv_pass_flatten);
+
return 0;
}
+/* Tests instructions for equality, but independently of sources. */
+static boolean
+is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
+{
+ if (a->opcode != b->opcode)
+ return FALSE;
+ if (nv_is_texture_op(a->opcode)) {
+ if (a->ext.tex.t != b->ext.tex.t ||
+ a->ext.tex.s != b->ext.tex.s)
+ return FALSE;
+ if (a->tex_dim != b->tex_dim ||
+ a->tex_array != b->tex_array ||
+ a->tex_cube != b->tex_cube ||
+ a->tex_shadow != b->tex_shadow ||
+ a->tex_live != b->tex_live)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_CVT) {
+ if (a->ext.cvt.s != b->ext.cvt.s ||
+ a->ext.cvt.d != b->ext.cvt.d)
+ return FALSE;
+ } else
+ if (NV_BASEOP(a->opcode) == NV_OP_SET ||
+ NV_BASEOP(a->opcode) == NV_OP_SLCT) {
+ if (a->set_cond != b->set_cond)
+ return FALSE;
+ } else
+ if (a->opcode == NV_OP_LINTERP ||
+ a->opcode == NV_OP_PINTERP) {
+ if (a->centroid != b->centroid ||
+ a->flat != b->flat)
+ return FALSE;
+ }
+ if (a->cc != b->cc)
+ return FALSE;
+ if (a->lanes != b->lanes ||
+ a->patch != b->patch ||
+ a->saturate != b->saturate)
+ return FALSE;
+ if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
+ return FALSE;
+ return TRUE;
+}
+
/* local common subexpression elimination, stupid O(n^2) implementation */
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
- int s;
+ int s, d;
unsigned int reps;
do {
reps = 0;
for (ir = entry; ir; ir = next) {
next = ir->next;
+ if (ir->fixed)
+ continue;
for (ik = entry; ik != ir; ik = ik->next) {
- if (ir->opcode != ik->opcode || ir->fixed)
+ if (!is_operation_equal(ir, ik))
continue;
-
- if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
+ if (!ir->def[0] || !ik->def[0])
continue;
if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
continue;
- if (!values_equal(ik->def[0], ir->def[0]))
+ for (d = 0; d < 4; ++d) {
+ if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
+ break;
+ if (ir->def[d]) {
+ if (!values_equal(ik->def[0], ir->def[0]))
+ break;
+ } else {
+ d = 4;
+ break;
+ }
+ }
+ if (d != 4)
continue;
- for (s = 0; s < 3; ++s) {
+ for (s = 0; s < 5; ++s) {
struct nv_value *a, *b;
- if (!ik->src[s]) {
- if (ir->src[s])
- break;
- continue;
+ if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
+ break;
+ if (!ir->src[s]) {
+ s = 5;
+ break;
}
+
if (ik->src[s]->mod != ir->src[s]->mod)
break;
a = ik->src[s]->value;
@@ -1089,14 +1271,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (a == b)
continue;
if (a->reg.file != b->reg.file ||
- a->reg.id < 0 ||
+ a->reg.id < 0 || /* this excludes memory loads/stores */
a->reg.id != b->reg.id)
break;
}
- if (s == 3) {
+ if (s == 5) {
nvc0_insn_delete(ir);
+ for (d = 0; d < 4 && ir->def[d]; ++d)
+ nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
++reps;
- nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}
@@ -1110,13 +1293,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
/* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
* neighbouring registers. CSE might have messed this up.
+ * Just generate a MOV for each source to avoid conflicts if they're used in
+ * multiple NV_OP_BIND at different positions.
*/
static int
nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
{
struct nv_value *val;
struct nv_instruction *bnd, *nvi, *next;
- int s, t;
+ int s;
for (bnd = b->entry; bnd; bnd = next) {
next = bnd->next;
@@ -1124,20 +1309,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b)
continue;
for (s = 0; s < 4 && bnd->src[s]; ++s) {
val = bnd->src[s]->value;
- for (t = s + 1; t < 4 && bnd->src[t]; ++t) {
- if (bnd->src[t]->value != val)
- continue;
- nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
- nvi->def[0] = new_value_like(ctx->pc, val);
- nvi->def[0]->insn = nvi;
- nv_reference(ctx->pc, nvi, 0, val);
- nvc0_insn_insert_before(bnd, nvi);
- nv_reference(ctx->pc, bnd, t, nvi->def[0]);
- }
+ nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
+ nvi->def[0] = new_value_like(ctx->pc, val);
+ nvi->def[0]->insn = nvi;
+ nv_reference(ctx->pc, nvi, 0, val);
+ nv_reference(ctx->pc, bnd, s, nvi->def[0]);
+
+ nvc0_insn_insert_before(bnd, nvi);
}
}
- DESCEND_ARBITRARY(t, nv_pass_fix_bind);
+ DESCEND_ARBITRARY(s, nv_pass_fix_bind);
return 0;
}
@@ -1153,11 +1335,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
pass.n = 0;
pass.pc = pc;
+ /* Do CSE so we can just compare values by pointer in subsequent passes. */
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, root);
+ if (ret)
+ return ret;
+
/* Do this first, so we don't have to pay attention
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
- ret = nv_pass_lower_arith(&pass, root);
+ ret = nv_pass_algebraic_opt(&pass, root);
if (ret)
return ret;
@@ -1185,11 +1373,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
reldelim->pc = pc;
}
- pc->pass_seq++;
- ret = nv_pass_cse(&pass, root);
- if (ret)
- return ret;
-
+ /* May run DCE before load-combining since that pass will clean up
+ * after itself.
+ */
dce.pc = pc;
do {
dce.removed = 0;
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
index b03826484e4..90c669cc4b8 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_print.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i)
PRINT("%s", gree);
if (NV_BASEOP(i->opcode) == NV_OP_SET)
- PRINT("set %s", nv_cond_name(i->set_cond));
+ PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));
else
if (i->saturate)
PRINT("sat %s", nvc0_opcode_name(i->opcode));
@@ -278,10 +278,10 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
{ NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
{ NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
- { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
@@ -302,7 +302,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_TRUNC, "trunc", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
@@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
{ NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
- { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
- { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+ { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
+ { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 1 },
{ NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
@@ -363,13 +363,13 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+ { NV_OP_SLCT, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_SLCT, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+ { NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
{ NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
index d24f09a1507..f4afe083e2d 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
@@ -39,6 +39,30 @@ struct register_set {
struct nv_pc *pc;
};
+/* aliasing is allowed */
+static void
+intersect_register_sets(struct register_set *dst,
+ struct register_set *src1, struct register_set *src2)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0];
+ dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1];
+ }
+}
+
+static void
+mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
+{
+ int i;
+
+ for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
+ set->bits[i][0] = (set->bits[i][0] | mask) & umask;
+ set->bits[i][1] = (set->bits[i][1] | mask) & umask;
+ }
+}
+
struct nv_pc_pass {
struct nv_pc *pc;
struct nv_instruction **insns;
@@ -63,6 +87,9 @@ add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
{
struct nv_range *range, **nextp = &val->livei;
+ if (bgn == end) /* [a, a) is invalid / empty */
+ return TRUE;
+
for (range = val->livei; range; range = range->next) {
if (end < range->bgn)
break; /* insert before */
@@ -327,14 +354,14 @@ do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
assert(b->join == a->join);
}
-static INLINE void
+static INLINE boolean
try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
{
if (!join_allowed(ctx, a, b)) {
#ifdef NVC0_RA_DEBUG_JOIN
debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
#endif
- return;
+ return FALSE;
}
if (livei_have_overlap(a->join, b->join)) {
#ifdef NVC0_RA_DEBUG_JOIN
@@ -342,10 +369,27 @@ try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
livei_print(a);
livei_print(b);
#endif
- return;
+ return FALSE;
}
do_join_values(ctx, a, b);
+
+ return TRUE;
+}
+
+static void
+join_values_nofail(struct nv_pc_pass *ctx,
+ struct nv_value *a, struct nv_value *b, boolean type_only)
+{
+ if (type_only) {
+ assert(join_allowed(ctx, a, b));
+ do_join_values(ctx, a, b);
+ } else {
+ boolean ok = try_join_values(ctx, a, b);
+ if (!ok) {
+ NOUVEAU_ERR("failed to coalesce values\n");
+ }
+ }
}
static INLINE boolean
@@ -360,20 +404,32 @@ need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
return (b->num_in > 1) && (n == 2);
}
+/* Look for the @phi's operand whose definition reaches @b. */
static int
phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
struct nv_basic_block *tb)
{
+ struct nv_ref *srci, *srcj;
int i, j;
for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
- if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+ srci = phi->src[i];
+ /* if already replaced, check with original source first */
+ if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
+ srci = srci->value->insn->src[0];
+ if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL))
continue;
/* NOTE: back-edges are ignored by the reachable-by check */
- if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
- phi->src[i]->value->insn->bb, tb))
+ if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ srci->value->insn->bb, NULL)) {
j = i;
+ srcj = srci;
+ }
}
+ if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL))
+ if (!nvc0_bblock_reachable_by(srcj->value->insn->bb,
+ phi->def[0]->insn->bb, NULL))
+ j = -1;
return j;
}
@@ -420,21 +476,23 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
ctx->pc->current_block = pn;
for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
- if ((j = phi_opnd_for_bb(i, p, b)) < 0)
- continue;
- val = i->src[j]->value;
-
- if (i->src[j]->flags) {
- /* value already encountered from a different in-block */
- val = val->insn->src[0]->value;
- while (j < 6 && i->src[j])
- ++j;
- assert(j < 6);
+ j = phi_opnd_for_bb(i, p, b);
+
+ if (j < 0) {
+ val = i->def[0];
+ } else {
+ val = i->src[j]->value;
+ if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
+ j = -1;
+ /* use original value, we already encountered & replaced it */
+ val = val->insn->src[0]->value;
+ }
}
+ if (j < 0) /* need an additional source ? */
+ for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j);
+ assert(j < 6); /* XXX: really ugly shaders */
ni = new_instruction(ctx->pc, NV_OP_MOV);
-
- /* TODO: insert instruction at correct position in the first place */
if (ni->prev && ni->prev->target)
nvc0_insns_permute(ni->prev, ni);
@@ -442,7 +500,7 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
ni->def[0]->insn = ni;
nv_reference(ctx->pc, ni, 0, val);
nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
- i->src[j]->flags = 1;
+ i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
}
if (pn != p && pn->exit) {
@@ -460,8 +518,13 @@ pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
return 0;
}
+#define JOIN_MASK_PHI (1 << 0)
+#define JOIN_MASK_SELECT (1 << 1)
+#define JOIN_MASK_MOV (1 << 2)
+#define JOIN_MASK_BIND (1 << 3)
+
static int
-pass_join_values(struct nv_pc_pass *ctx, int iter)
+pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
{
int c, n;
@@ -470,36 +533,33 @@ pass_join_values(struct nv_pc_pass *ctx, int iter)
switch (i->opcode) {
case NV_OP_PHI:
- if (iter != 2)
+ if (!(mask & JOIN_MASK_PHI))
break;
for (c = 0; c < 6 && i->src[c]; ++c)
- try_join_values(ctx, i->def[0], i->src[c]->value);
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
break;
case NV_OP_MOV:
- if ((iter == 2) && i->src[0]->value->insn &&
- !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
+ if (!(mask & JOIN_MASK_MOV))
+ break;
+ if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1])
try_join_values(ctx, i->def[0], i->src[0]->value);
break;
case NV_OP_SELECT:
- if (iter != 1)
+ if (!(mask & JOIN_MASK_SELECT))
break;
- for (c = 0; c < 6 && i->src[c]; ++c) {
- assert(join_allowed(ctx, i->def[0], i->src[c]->value));
- do_join_values(ctx, i->def[0], i->src[c]->value);
- }
- break;
- case NV_OP_TEX:
- case NV_OP_TXB:
- case NV_OP_TXL:
- case NV_OP_TXQ:
- /* on nvc0, TEX src and dst can differ */
+ for (c = 0; c < 6 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
break;
case NV_OP_BIND:
- if (iter)
+ if (!(mask & JOIN_MASK_BIND))
break;
- for (c = 0; c < 6 && i->src[c]; ++c)
- do_join_values(ctx, i->def[c], i->src[c]->value);
+ for (c = 0; c < 4 && i->src[c]; ++c)
+ join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
default:
break;
}
@@ -621,15 +681,16 @@ static void collect_live_values(struct nv_basic_block *b, const int n)
{
int i;
- if (b->out[0]) {
- if (b->out[1]) { /* what to do about back-edges ? */
+ /* XXX: what to do about back/fake-edges (used to include both here) ? */
+ if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
for (i = 0; i < n; ++i)
b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
} else {
memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
}
} else
- if (b->out[1]) {
+ if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
} else {
memset(b->live_set, 0, n * sizeof(uint32_t));
@@ -746,42 +807,46 @@ insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
elem->next = nval;
}
-static int
-pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+static void
+collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
+ boolean assigned_only)
{
- struct nv_instruction *i;
- struct register_set f, free;
+ struct nv_value *val;
int k, n;
- struct nv_value *cur, *val, *tmp[2];
- struct nv_value active, inactive, handled, unhandled;
- make_empty_list(&active);
- make_empty_list(&inactive);
- make_empty_list(&handled);
- make_empty_list(&unhandled);
+ make_empty_list(head);
- nvc0_ctor_register_set(ctx->pc, &free);
-
- /* joined values should have range = NULL and thus not be added;
- * also, fixed memory values won't be added because they're not
- * def'd, just used
- */
for (n = 0; n < ctx->num_insns; ++n) {
- i = ctx->insns[n];
+ struct nv_instruction *i = ctx->insns[n];
+ /* for joined values, only the representative will have livei != NULL */
for (k = 0; k < 5; ++k) {
if (i->def[k] && i->def[k]->livei)
- insert_ordered_tail(&unhandled, i->def[k]);
- else
- if (0 && i->def[k])
- debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ if (!assigned_only || i->def[k]->reg.id >= 0)
+ insert_ordered_tail(head, i->def[k]);
}
}
- for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ for (val = head->next; val != head->prev; val = val->next) {
assert(val->join == val);
assert(val->livei->bgn <= val->next->livei->bgn);
}
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx)
+{
+ struct register_set f, free;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+
+ nvc0_ctor_register_set(ctx->pc, &free);
+
+ collect_register_values(ctx, &unhandled, FALSE);
foreach_s(cur, tmp[0], &unhandled) {
remove_from_list(cur);
@@ -818,14 +883,7 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
reg_occupy(&f, val);
if (cur->reg.id < 0) {
- boolean mem = FALSE;
- int v = nvi_vector_size(cur->insn);
-
- if (v > 1)
- mem = !reg_assign(&f, &cur->insn->def[0], v);
- else
- if (iter)
- mem = !reg_assign(&f, &cur, 1);
+ boolean mem = !reg_assign(&f, &cur, 1);
if (mem) {
NOUVEAU_ERR("out of registers\n");
@@ -839,6 +897,68 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
return 0;
}
+/* Allocate values defined by instructions such as TEX, which have to be
+ * assigned to consecutive registers.
+ * Linear scan doesn't really work here since the values can have different
+ * live intervals.
+ */
+static int
+pass_allocate_constrained_values(struct nv_pc_pass *ctx)
+{
+ struct nv_value regvals, *val;
+ struct nv_instruction *i;
+ struct nv_value *defs[4];
+ struct register_set regs[4];
+ int n, vsize, c;
+ uint32_t mask;
+ boolean mem;
+
+ collect_register_values(ctx, &regvals, TRUE);
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+ vsize = nvi_vector_size(i);
+ if (!(vsize > 1))
+ continue;
+ assert(vsize <= 4);
+
+ for (c = 0; c < vsize; ++c)
+ defs[c] = i->def[c]->join;
+
+ if (defs[0]->reg.id >= 0) {
+ for (c = 1; c < vsize; ++c)
+ assert(defs[c]->reg.id >= 0);
+ continue;
+ }
+
+ for (c = 0; c < vsize; ++c) {
+ nvc0_ctor_register_set(ctx->pc, &regs[c]);
+
+ foreach(val, &regvals) {
+ if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
+ reg_occupy(&regs[c], val);
+ }
+ mask = 0x11111111;
+ if (vsize == 2) /* granularity is 2 and not 4 */
+ mask |= 0x11111111 << 2;
+ mask_register_set(&regs[c], 0, mask << c);
+
+ if (defs[c]->livei)
+ insert_ordered_tail(&regvals, defs[c]);
+ }
+ for (c = 1; c < vsize; ++c)
+ intersect_register_sets(&regs[0], &regs[0], &regs[c]);
+
+ mem = !reg_assign(&regs[0], &defs[0], vsize);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ return 0;
+}
+
static int
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
@@ -862,6 +982,10 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
+#ifdef NVC0_RA_DEBUG_LIVEI
+ nvc0_print_function(root);
+#endif
+
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
ret = pass_build_live_sets(ctx, root);
@@ -888,19 +1012,19 @@ nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
livei_print(&pc->values[i]);
#endif
- ret = pass_join_values(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_PHI);
if (ret)
goto out;
- ret = pass_linear_scan(ctx, 0);
+ ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND);
if (ret)
goto out;
- ret = pass_join_values(ctx, 1);
+ ret = pass_join_values(ctx, JOIN_MASK_MOV);
if (ret)
goto out;
- ret = pass_join_values(ctx, 2);
+ ret = pass_allocate_constrained_values(ctx);
if (ret)
goto out;
- ret = pass_linear_scan(ctx, 1);
+ ret = pass_linear_scan(ctx);
if (ret)
goto out;
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index aefaf7b98ad..899fe147c6a 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -185,8 +185,17 @@ nvc0_varying_location(unsigned sn, unsigned si)
return 0x2e0;
*/
case TGSI_SEMANTIC_GENERIC:
+ /* We'd really like to distinguish between TEXCOORD and GENERIC here,
+ * since only 0x300 to 0x37c can be replaced by sprite coordinates.
+ * Also, gl_PointCoord should be a system value and must be assigned to
+ * address 0x2e0. For now, let's cheat:
+ */
assert(si < 31);
- return 0x80 + (si * 16);
+ if (si <= 7)
+ return 0x300 + si * 16;
+ if (si == 9)
+ return 0x2e0;
+ return 0x80 + ((si - 8) * 16);
case TGSI_SEMANTIC_NORMAL:
return 0x360;
case TGSI_SEMANTIC_PRIMID:
@@ -256,12 +265,14 @@ prog_decl(struct nvc0_translation_info *ti,
case TGSI_FILE_INPUT:
for (i = first; i <= last; ++i) {
if (ti->prog->type == PIPE_SHADER_VERTEX) {
- sn = TGSI_SEMANTIC_GENERIC;
- si = i;
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
+ } else {
+ for (c = 0; c < 4; ++c)
+ ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for sprite coordinates: */
+ ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
}
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
-
if (ti->prog->type == PIPE_SHADER_FRAGMENT)
ti->interp_mode[i] = nvc0_interp_mode(decl);
}
@@ -281,6 +292,8 @@ prog_decl(struct nvc0_translation_info *ti,
} else {
for (c = 0; c < 4; ++c)
ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+ /* for TFB_VARYING_LOCS: */
+ ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
}
}
break;
@@ -288,9 +301,11 @@ prog_decl(struct nvc0_translation_info *ti,
ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
assert(first == last);
break;
+ case TGSI_FILE_TEMPORARY:
+ ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
+ break;
case TGSI_FILE_NULL:
case TGSI_FILE_CONSTANT:
- case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_IMMEDIATE:
@@ -518,8 +533,13 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
if (!ti->input_access[i][c])
continue;
a = ti->input_loc[i][c] / 2;
- if ((a & ~7) == 0x70/2)
- fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
+ if (ti->input_loc[i][c] >= 0x2c0)
+ a -= 32;
+ if (ti->input_loc[i][0] == 0x70)
+ fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
+ else
+ if (ti->input_loc[i][0] == 0x2e0)
+ fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
else
fp->hdr[4 + a / 32] |= m << (a % 32);
}
@@ -618,7 +638,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti)
if (ti->scan.writes_z)
prog->flags[0] = 0x11; /* ? */
else
- if (!ti->global_stores)
+ if (!ti->scan.uses_kill && !ti->global_stores)
prog->fp.early_z = 1;
ret = nvc0_fp_gen_header(prog, ti);
@@ -629,6 +649,11 @@ nvc0_prog_scan(struct nvc0_translation_info *ti)
break;
}
+ if (ti->require_stores) {
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+ }
+
assert(!ret);
return ret;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index e6b210d1355..f6fea29780b 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -21,16 +21,18 @@ struct nvc0_program {
unsigned code_size;
unsigned parm_size;
- uint32_t hdr[20];
+ uint32_t hdr[20]; /* TODO: move this into code to save space */
uint32_t flags[2];
struct {
uint8_t edgeflag;
uint8_t num_ucps;
+ uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS];
} vp;
struct {
uint8_t early_z;
+ uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
} fp;
void *relocs;
@@ -74,6 +76,7 @@ struct nvc0_translation_info {
uint32_t *immd32;
ubyte *immd32_ty;
unsigned immd32_nr;
+ unsigned temp128_nr;
ubyte edgeflag_out;
struct nvc0_subroutine *subr;
unsigned num_subrs;
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
index 74c3451c19a..fcbb7da41a3 100644
--- a/src/gallium/drivers/nvc0/nvc0_push.c
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -217,6 +217,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct push_context ctx;
unsigned i, index_size;
unsigned inst = info->instance_count;
+ boolean apply_bias = info->indexed && info->index_bias;
ctx.chan = nvc0->screen->base.channel;
ctx.translate = nvc0->vertex->translate;
@@ -230,7 +231,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
data = nvc0_resource_map_offset(nvc0, res,
vb->buffer_offset, NOUVEAU_BO_RD);
- if (info->indexed)
+
+ if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i))))
data += info->index_bias * vb->stride;
ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
index cc83fbe771c..e5e43c0e7a5 100644
--- a/src/gallium/drivers/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -312,6 +312,7 @@ nvc0_render_condition(struct pipe_context *pipe,
if (mode == PIPE_RENDER_COND_WAIT ||
mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -319,6 +320,7 @@ nvc0_render_condition(struct pipe_context *pipe,
OUT_RING (chan, 0x00001001);
}
+ MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
index 17e79642a6d..599823c0dc9 100644
--- a/src/gallium/drivers/nvc0/nvc0_resource.h
+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
@@ -24,7 +24,8 @@ struct nvc0_context;
* USER_MEMORY: resource->data is a pointer to client memory and may change
* between GL calls
*/
-#define NVC0_BUFFER_STATUS_DIRTY (1 << 0)
+#define NVC0_BUFFER_STATUS_GPU_READING (1 << 0)
+#define NVC0_BUFFER_STATUS_GPU_WRITING (1 << 1)
#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
/* Resources, if mapped into the GPU's address space, are guaranteed to
@@ -51,6 +52,9 @@ struct nvc0_resource {
struct nvc0_mm_allocation *mm;
};
+void
+nvc0_buffer_release_gpu_storage(struct nvc0_resource *);
+
boolean
nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
unsigned start, unsigned size);
@@ -87,7 +91,7 @@ nvc0_resource_map_offset(struct nvc0_context *nvc0,
nvc0_buffer_adjust_score(nvc0, res, -250);
if ((res->domain == NOUVEAU_BO_VRAM) &&
- (res->status & NVC0_BUFFER_STATUS_DIRTY))
+ (res->status & NVC0_BUFFER_STATUS_GPU_WRITING))
nvc0_buffer_download(nvc0, res, 0, res->base.width0);
if ((res->domain != NOUVEAU_BO_GART) ||
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index f608b32e1cb..f7f1fd09a12 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -75,6 +75,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 10;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 1;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -281,9 +283,6 @@ nvc0_magic_3d_init(struct nouveau_channel *chan)
BEGIN_RING(chan, RING_3D_(0x074c), 1);
OUT_RING (chan, 0x3f);
- BEGIN_RING(chan, RING_3D_(0x10f8), 1);
- OUT_RING (chan, 0x0101);
-
BEGIN_RING(chan, RING_3D_(0x16a8), 1);
OUT_RING (chan, (3 << 16) | 3);
BEGIN_RING(chan, RING_3D_(0x1794), 1);
@@ -476,7 +475,7 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RING (chan, (15 << 4) | 1);
}
- screen->tls_size = 4 * 4 * 32 * 128 * 4;
+ screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
screen->tls_size, &screen->tls);
if (ret)
@@ -490,6 +489,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RING (chan, screen->tls_size >> 32);
OUT_RING (chan, screen->tls_size);
+ BEGIN_RING(chan, RING_3D_(0x07a0), 1);
+ OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
OUT_RING (chan, 0);
@@ -532,16 +533,27 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
OUT_RING (chan, 0x3f);
- BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_MODE), 1);
+ OUT_RING (chan, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ OUT_RING(chan, 0);
+ BEGIN_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
OUT_RING (chan, 0);
+ /* neither scissors, viewport nor stencil mask should affect clears */
+ BEGIN_RING(chan, RING_3D(CLEAR_FLAGS), 1);
+ OUT_RING (chan, 0);
+
BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
OUT_RINGf (chan, 0.0f);
OUT_RINGf (chan, 1.0f);
+ BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ OUT_RING (chan, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
/* We use scissors instead of exact view volume clipping,
* so they're always enabled.
@@ -628,11 +640,14 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+ MARK_RING(chan, 5, 5);
nouveau_bo_validate(chan, screen->text, flags);
nouveau_bo_validate(chan, screen->uniforms, flags);
nouveau_bo_validate(chan, screen->txc, flags);
- nouveau_bo_validate(chan, screen->tls, flags);
nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
+
+ if (screen->cur_ctx && screen->cur_ctx->state.tls_required)
+ nouveau_bo_validate(chan, screen->tls, flags);
}
int
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 1fac142e2be..d952ff1f9b1 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -128,17 +128,25 @@ nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
{
struct nvc0_screen *screen = nvc0_screen(res->base.screen);
- nouveau_bo_validate(screen->base.channel, res->bo, flags);
+ if (likely(res->bo)) {
+ nouveau_bo_validate(screen->base.channel, res->bo, flags);
- nvc0_resource_fence(res, flags);
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NVC0_BUFFER_STATUS_GPU_READING;
+
+ nvc0_resource_fence(res, flags);
+ }
}
boolean
nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
-
void
nvc0_screen_fence_next(struct nvc0_screen *);
+void
+nvc0_screen_fence_update(struct nvc0_screen *, boolean flushed);
static INLINE boolean
nvc0_screen_fence_emit(struct nvc0_screen *screen)
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 981b5488d08..357f8b80deb 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -27,6 +27,16 @@
#include "nvc0_context.h"
+static INLINE void
+nvc0_program_update_context_state(struct nvc0_context *nvc0,
+ struct nvc0_program *prog, int stage)
+{
+ if (prog->hdr[1])
+ nvc0->state.tls_required |= 1 << stage;
+ else
+ nvc0->state.tls_required &= ~(1 << stage);
+}
+
static boolean
nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
@@ -55,7 +65,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->code_base + NVC0_SHADER_HEADER_SIZE,
prog->code_size, prog->code);
- BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
+ BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1);
OUT_RING (nvc0->screen->base.channel, 0x1111);
return TRUE;
@@ -77,6 +87,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
if (!nvc0_program_validate(nvc0, vp))
return;
+ nvc0_program_update_context_state(nvc0, vp, 0);
BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
OUT_RING (chan, 0x11);
@@ -98,6 +109,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
if (!nvc0_program_validate(nvc0, fp))
return;
+ nvc0_program_update_context_state(nvc0, fp, 4);
BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
OUT_RING (chan, fp->fp.early_z);
@@ -127,6 +139,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, tp))
return;
+ nvc0_program_update_context_state(nvc0, tp, 1);
BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
OUT_RING (chan, 0x21);
@@ -148,6 +161,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, tp))
return;
+ nvc0_program_update_context_state(nvc0, tp, 2);
BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
OUT_RING (chan, 0x31);
@@ -170,6 +184,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
}
if (!nvc0_program_validate(nvc0, gp))
return;
+ nvc0_program_update_context_state(nvc0, gp, 3);
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
OUT_RING (chan, 0x41);
@@ -178,3 +193,59 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
OUT_RING (chan, gp->max_gpr);
}
+
+/* It's *is* kind of shader related. We need to inspect the program
+ * to get the output locations right.
+ */
+void
+nvc0_tfb_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_program *vp;
+ struct nvc0_transform_feedback_state *tfb = nvc0->tfb;
+ int b;
+
+ BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+ if (!tfb) {
+ OUT_RING(chan, 0);
+ return;
+ }
+ OUT_RING(chan, 1);
+
+ vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog;
+
+ for (b = 0; b < nvc0->num_tfbbufs; ++b) {
+ uint8_t idx, var[128];
+ int i, n;
+ struct nvc0_resource *buf = nvc0_resource(nvc0->tfbbuf[b]);
+
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
+ OUT_RING (chan, 1);
+ OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
+ OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]);
+ OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */
+
+ if (!(nvc0->dirty & NVC0_NEW_TFB))
+ continue;
+
+ BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, tfb->varying_count[b]);
+ OUT_RING (chan, tfb->stride[b]);
+
+ n = b ? tfb->varying_count[b - 1] : 0;
+ i = 0;
+ for (; i < tfb->varying_count[b]; ++i) {
+ idx = tfb->varying_index[n + i];
+ var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3);
+ }
+ for (; i & 3; ++i)
+ var[i] = 0;
+
+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
+ OUT_RINGp (chan, var, i / 4);
+ }
+ for (; b < 4; ++b)
+ IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index c08f3693f5e..aa437195764 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -22,6 +22,7 @@
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
@@ -50,40 +51,35 @@ nvc0_colormask(unsigned mask)
return ret;
}
+#define NVC0_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
static INLINE uint32_t
nvc0_blend_fac(unsigned factor)
{
- static const uint16_t bf[] = {
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
- NV50_3D_BLEND_FACTOR_ONE,
- NV50_3D_BLEND_FACTOR_SRC_COLOR,
- NV50_3D_BLEND_FACTOR_SRC_ALPHA,
- NV50_3D_BLEND_FACTOR_DST_ALPHA,
- NV50_3D_BLEND_FACTOR_DST_COLOR,
- NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
- NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
- NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
- NV50_3D_BLEND_FACTOR_SRC1_COLOR,
- NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
- NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
- NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
- };
-
- assert(factor < (sizeof(bf) / sizeof(bf[0])));
- return bf[factor];
+ switch (factor) {
+ NVC0_BLEND_FACTOR_CASE(ONE, ONE);
+ NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
}
static void *
@@ -176,9 +172,9 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
return NULL;
so->pipe = *cso;
-#ifndef NVC0_SCISSORS_CLIPPING
- SB_IMMED_3D(so, SCISSOR_ENABLE(0), cso->scissor);
-#endif
+ /* Scissor enables are handled in scissor state, we will not want to
+ * always emit 16 commands, one for each scissor rectangle, here.
+ */
SB_BEGIN_3D(so, SHADE_MODEL, 1);
SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
@@ -242,7 +238,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
SB_DATA (so, fui(cso->offset_scale));
SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
- SB_DATA (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
}
assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
@@ -283,20 +279,21 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
}
if (cso->stencil[0].enabled) {
- SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
- SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
- SB_DATA (so, cso->stencil[0].writemask);
+ SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2);
SB_DATA (so, cso->stencil[0].valuemask);
+ SB_DATA (so, cso->stencil[0].writemask);
} else {
- SB_IMMED_3D(so, STENCIL_FRONT_ENABLE, 0);
+ SB_IMMED_3D(so, STENCIL_ENABLE, 0);
}
if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
SB_DATA (so, 1);
SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
@@ -306,7 +303,8 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
SB_DATA (so, cso->stencil[1].writemask);
SB_DATA (so, cso->stencil[1].valuemask);
- } else {
+ } else
+ if (cso->stencil[0].enabled) {
SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
}
@@ -808,6 +806,74 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_VERTEX;
}
+static void *
+nvc0_tfb_state_create(struct pipe_context *pipe,
+ const struct pipe_stream_output_state *pso)
+{
+ struct nvc0_transform_feedback_state *so;
+ int n = 0;
+ int i, c, b;
+
+ so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t));
+ if (!so)
+ return NULL;
+
+ for (b = 0; b < 4; ++b) {
+ for (i = 0; i < pso->num_outputs; ++i) {
+ if (pso->output_buffer[i] != b)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(pso->register_mask[i] & (1 << c)))
+ continue;
+ so->varying_count[b]++;
+ so->varying_index[n++] = (pso->register_index[i] << 2) | c;
+ }
+ }
+ so->stride[b] = so->varying_count[b] * 4;
+ }
+ if (pso->stride)
+ so->stride[0] = pso->stride;
+
+ return so;
+}
+
+static void
+nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void
+nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ nvc0_context(pipe)->tfb = hwcso;
+ nvc0_context(pipe)->dirty |= NVC0_NEW_TFB;
+}
+
+static void
+nvc0_set_transform_feedback_buffers(struct pipe_context *pipe,
+ struct pipe_resource **buffers,
+ int *offsets,
+ int num_buffers)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ int i;
+
+ assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */
+
+ for (i = 0; i < num_buffers; ++i) {
+ assert(offsets[i] >= 0);
+ nvc0->tfb_offset[i] = offsets[i];
+ pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i)
+ pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+
+ nvc0->num_tfbbufs = num_buffers;
+
+ nvc0->dirty |= NVC0_NEW_TFB_BUFFERS;
+}
+
void
nvc0_init_state_functions(struct nvc0_context *nvc0)
{
@@ -861,5 +927,12 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
+
+ nvc0->pipe.create_stream_output_state = nvc0_tfb_state_create;
+ nvc0->pipe.delete_stream_output_state = nvc0_tfb_state_delete;
+ nvc0->pipe.bind_stream_output_state = nvc0_tfb_state_bind;
+ nvc0->pipe.set_stream_output_buffers = nvc0_set_transform_feedback_buffers;
+
+ nvc0->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 25aec0244db..70c418fad9b 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -25,6 +25,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
else
width = fb->width;
+ MARK_RING (chan, 23, 4);
BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
@@ -57,6 +58,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
unsigned i;
+ boolean serialize = FALSE;
nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
@@ -66,12 +68,14 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, fb->width << 16);
OUT_RING (chan, fb->height << 16);
+ MARK_RING(chan, 9 * fb->nr_cbufs, 2 * fb->nr_cbufs);
+
for (i = 0; i < fb->nr_cbufs; ++i) {
struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
struct nvc0_surface *sf = nvc0_surface(fb->cbufs[i]);
struct nouveau_bo *bo = mt->base.bo;
uint32_t offset = sf->offset;
-
+
BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
@@ -83,6 +87,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, sf->depth);
OUT_RING (chan, mt->layer_stride >> 2);
+ if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
}
@@ -93,7 +102,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct nouveau_bo *bo = mt->base.bo;
int unk = mt->base.base.target == PIPE_TEXTURE_2D;
uint32_t offset = sf->offset;
-
+
+ MARK_RING (chan, 12, 2);
BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
@@ -107,6 +117,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, sf->height);
OUT_RING (chan, (unk << 16) | sf->depth);
+ if (mt->base.status & NVC0_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NVC0_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
} else {
@@ -114,11 +129,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
OUT_RING (chan, 0);
}
-#ifndef NVC0_SCISSORS_CLIPPING
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, fb->width << 16);
- OUT_RING (chan, fb->height << 16);
-#endif
+ if (serialize) {
+ BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+ OUT_RING (chan, 0);
+ }
}
static void
@@ -160,65 +174,54 @@ nvc0_validate_scissor(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct pipe_scissor_state *s = &nvc0->scissor;
-#ifdef NVC0_SCISSORS_CLIPPING
- struct pipe_viewport_state *vp = &nvc0->viewport;
- int minx, maxx, miny, maxy;
- if (!(nvc0->dirty &
- (NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT | NVC0_NEW_FRAMEBUFFER)) &&
- nvc0->state.scissor == nvc0->rast->pipe.scissor)
+ if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ nvc0->rast->pipe.scissor == nvc0->state.scissor)
return;
nvc0->state.scissor = nvc0->rast->pipe.scissor;
- if (nvc0->state.scissor) {
- minx = s->minx;
- maxx = s->maxx;
- miny = s->miny;
- maxy = s->maxy;
+ BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+ if (nvc0->rast->pipe.scissor) {
+ OUT_RING(chan, (s->maxx << 16) | s->minx);
+ OUT_RING(chan, (s->maxy << 16) | s->miny);
} else {
- minx = 0;
- maxx = nvc0->framebuffer.width;
- miny = 0;
- maxy = nvc0->framebuffer.height;
+ OUT_RING(chan, (0xffff << 16) | 0);
+ OUT_RING(chan, (0xffff << 16) | 0);
}
-
- minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
- maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
- miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
- maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
-
- BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
- OUT_RING (chan, (maxx << 16) | minx);
- OUT_RING (chan, (maxy << 16) | miny);
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, ((maxx - minx) << 16) | minx);
- OUT_RING (chan, ((maxy - miny) << 16) | miny);
-#else
- BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
- OUT_RING (chan, (s->maxx << 16) | s->minx);
- OUT_RING (chan, (s->maxy << 16) | s->miny);
-#endif
}
static void
nvc0_validate_viewport(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int x, y, w, h;
+ float zmin, zmax;
BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
- OUT_RINGf (chan, nvc0->viewport.translate[0]);
- OUT_RINGf (chan, nvc0->viewport.translate[1]);
- OUT_RINGf (chan, nvc0->viewport.translate[2]);
+ OUT_RINGf (chan, vp->translate[0]);
+ OUT_RINGf (chan, vp->translate[1]);
+ OUT_RINGf (chan, vp->translate[2]);
BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
- OUT_RINGf (chan, nvc0->viewport.scale[0]);
- OUT_RINGf (chan, nvc0->viewport.scale[1]);
- OUT_RINGf (chan, nvc0->viewport.scale[2]);
+ OUT_RINGf (chan, vp->scale[0]);
+ OUT_RINGf (chan, vp->scale[1]);
+ OUT_RINGf (chan, vp->scale[2]);
-#ifdef NVC0_SCISSORS_CLIPPING
+ /* now set the viewport rectangle to viewport dimensions for clipping */
+
+ x = (int)(vp->translate[0] - fabsf(vp->scale[0]));
+ y = (int)(vp->translate[1] - fabsf(vp->scale[1]));
+ w = (int)fabsf(2.0f * vp->scale[0]);
+ h = (int)fabsf(2.0f * vp->scale[1]);
+ zmin = vp->translate[2] - fabsf(vp->scale[2]);
+ zmax = vp->translate[2] + fabsf(vp->scale[2]);
+
+ BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+ OUT_RING (chan, (w << 16) | x);
+ OUT_RING (chan, (h << 16) | y);
BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
- OUT_RINGf (chan, nvc0->viewport.translate[2] - nvc0->viewport.scale[2]);
- OUT_RINGf (chan, nvc0->viewport.translate[2] + nvc0->viewport.scale[2]);
-#endif
+ OUT_RINGf (chan, zmin);
+ OUT_RINGf (chan, zmax);
}
static void
@@ -227,10 +230,15 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
uint32_t clip;
- clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
-#ifndef NVC0_SCISSORS_CLIPPING
- clip |= 0x1080;
-#endif
+ if (nvc0->clip.depth_clamp) {
+ clip =
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2;
+ } else {
+ clip = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1;
+ }
BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
OUT_RING (chan, clip);
@@ -238,6 +246,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
if (nvc0->clip.nr) {
struct nouveau_bo *bo = nvc0->screen->uniforms;
+ MARK_RING (chan, 6 + nvc0->clip.nr * 4, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, 256);
OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -281,6 +290,32 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0)
}
static void
+nvc0_validate_sprite_coords(struct nvc0_context *nvc0)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ uint32_t reg;
+
+ if (nvc0->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT)
+ reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT;
+ else
+ reg = NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT;
+
+ if (nvc0->rast->pipe.point_quad_rasterization) {
+ uint32_t en = nvc0->rast->pipe.sprite_coord_enable;
+
+ while (en) {
+ int i = ffs(en) - 1;
+ en &= ~(1 << i);
+ if (i >= 0 && i < 8)
+ reg |= 8 << i;
+ }
+ }
+
+ BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
+ OUT_RING (chan, reg);
+}
+
+static void
nvc0_constbufs_validate(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
@@ -340,6 +375,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
if (rebind) {
+ MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, align(res->base.width0, 0x100));
OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -357,6 +393,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
}
nr = MIN2(MIN2(nr - 6, words), NV04_PFIFO_MAX_PACKET_LEN - 1);
+ MARK_RING (chan, nr + 5, 2);
BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
OUT_RING (chan, align(res->base.width0, 0x100));
OUT_RELOCh(chan, bo, base, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -383,13 +420,7 @@ static struct state_validate {
{ nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
{ nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
{ nvc0_validate_stipple, NVC0_NEW_STIPPLE },
-#ifdef NVC0_SCISSORS_CLIPPING
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_VIEWPORT |
- NVC0_NEW_RASTERIZER |
- NVC0_NEW_FRAMEBUFFER },
-#else
- { nvc0_validate_scissor, NVC0_NEW_SCISSOR },
-#endif
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
{ nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
{ nvc0_validate_clip, NVC0_NEW_CLIP },
{ nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
@@ -397,10 +428,12 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_validate_sprite_coords, NVC0_NEW_RASTERIZER | NVC0_NEW_FRAGPROG },
{ nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
{ nvc0_validate_textures, NVC0_NEW_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
- { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
index 6c8028aba13..57566128ab5 100644
--- a/src/gallium/drivers/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -4,8 +4,6 @@
#include "pipe/p_state.h"
-#define NVC0_SCISSORS_CLIPPING
-
#define SB_BEGIN_3D(so, m, s) \
(so)->state[(so)->size++] = \
(0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
@@ -67,16 +65,17 @@ struct nvc0_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
+ boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
unsigned vtx_size;
unsigned vtx_per_packet_max;
- struct nvc0_vertex_element element[1];
+ struct nvc0_vertex_element element[0];
};
/* will have to lookup index -> location qualifier from nvc0_program */
-struct nvc0_tfb_state {
- uint8_t varying_count[4];
+struct nvc0_transform_feedback_state {
uint32_t stride[4];
- uint8_t varying_indices[1];
+ uint8_t varying_count[4];
+ uint8_t varying_index[0];
};
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index cc0a65687dc..8898bc733a3 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -33,25 +33,15 @@
#include "nv50_defs.xml.h"
+#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+
/* return TRUE for formats that can be converted among each other by NVC0_2D */
static INLINE boolean
nvc0_2d_format_faithful(enum pipe_format format)
{
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- case PIPE_FORMAT_B8G8R8X8_SRGB:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
- case PIPE_FORMAT_R8_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return TRUE;
- default:
- return FALSE;
- }
+ uint8_t id = nvc0_format_table[format].rt;
+
+ return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
}
static INLINE uint8_t
@@ -62,7 +52,7 @@ nvc0_2d_format(enum pipe_format format)
/* Hardware values for color formats range from 0xc0 to 0xff,
* but the 2D engine doesn't support all of them.
*/
- if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
+ if (nvc0_2d_format_faithful(format))
return id;
switch (util_format_get_blocksize(format)) {
@@ -72,6 +62,10 @@ nvc0_2d_format(enum pipe_format format)
return NV50_SURFACE_FORMAT_R16_UNORM;
case 4:
return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_R16G16B16A16_UNORM;
+ case 16:
+ return NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT;
default:
return 0;
}
@@ -249,15 +243,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
OUT_RING (chan, 1);
OUT_RING (chan, 0);
- /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
-
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
OUT_RING (chan, 0x3c);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
}
@@ -306,13 +301,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
OUT_RING (chan, sf->height);
OUT_RING (chan, (1 << 16) | 1);
- BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
- OUT_RING (chan, (width << 16) | dstx);
- OUT_RING (chan, (height << 16) | dsty);
+ BEGIN_RING(chan, RING_3D(CLIP_RECT_HORIZ(0)), 2);
+ OUT_RING (chan, ((dstx + width) << 16) | dstx);
+ OUT_RING (chan, ((dsty + height) << 16) | dsty);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 1);
BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
OUT_RING (chan, mode);
+ IMMED_RING(chan, RING_3D(CLIP_RECTS_EN), 0);
+
nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index b219f82c903..968558a5869 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -196,9 +196,16 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
OUT_RINGp (chan, &tic->tic[3], 5);
need_flush = TRUE;
+ } else
+ if (res->status & NVC0_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+ OUT_RING (chan, (tic->id << 4) | 1);
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+ res->status &= ~NVC0_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NVC0_BUFFER_STATUS_GPU_READING;
+
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
index 950bee2eda4..a44d330c731 100644
--- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
+++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
@@ -63,7 +63,13 @@ bld_register_access(struct bld_register *reg, unsigned i)
static INLINE void
bld_register_add_val(struct bld_register *reg, struct nv_value *val)
{
- util_dynarray_append(&reg->vals, struct nv_value *, val);
+ struct nv_basic_block *bb = val->insn->bb;
+
+ if (reg->vals.size &&
+ (util_dynarray_top(&reg->vals, struct nv_value *))->insn->bb == bb)
+ *(util_dynarray_top_ptr(&reg->vals, struct nv_value *)) = val;
+ else
+ util_dynarray_append(&reg->vals, struct nv_value *, val);
}
static INLINE boolean
@@ -127,13 +133,10 @@ struct bld_context {
static INLINE ubyte
bld_register_file(struct bld_context *bld, struct bld_register *reg)
{
- if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
- else
- if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
- else
- if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
- else
- return NV_FILE_MEM_V;
+ if (reg >= &bld->pvs[0][0] &&
+ reg < &bld->ovs[0][0])
+ return NV_FILE_PRED;
+ return NV_FILE_GPR;
}
static INLINE struct nv_value *
@@ -361,6 +364,9 @@ bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
struct nv_basic_block *bb = bld->pc->current_block;
struct nv_value *val = NULL;
+ if (bld->ti->require_stores) /* XXX: actually only for INDEXABLE_TEMP */
+ return NULL;
+
if (bld->loop_lvl > 1) {
--bld->loop_lvl;
if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
@@ -459,6 +465,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
reg = (struct bld_register *)phi->target;
phi->target = NULL;
+ /* start with s == 1, src[0] is from outside the loop */
for (s = 1, n = 0; n < bb->num_in; ++n) {
if (bb->in_kind[n] != CFG_EDGE_BACK)
continue;
@@ -470,8 +477,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
for (i = 0; i < 4; ++i)
if (phi->src[i] && phi->src[i]->value == val)
break;
- if (i == 4)
+ if (i == 4) {
+ /* skip values we do not want to replace */
+ for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s);
nv_reference(bld->pc, phi, s++, val);
+ }
}
bld->pc->current_block = save;
@@ -563,11 +573,12 @@ bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
- loc->reg.id = ofst * 4;
+ loc->reg.address = ofst * 4;
nv_reference(bld->pc, insn, 0, loc);
- nv_reference(bld->pc, insn, 1, ptr);
- nv_reference(bld->pc, insn, 2, val);
+ nv_reference(bld->pc, insn, 1, val);
+ if (ptr)
+ bld_src_pointer(bld, insn, 2, ptr);
}
static struct nv_value *
@@ -579,7 +590,9 @@ bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
loc->reg.address = ofst * 4;
- val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
+ val = bld_insn_1(bld, NV_OP_LD, loc);
+ if (ptr)
+ bld_src_pointer(bld, val->insn, 1, ptr);
return val;
}
@@ -650,7 +663,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src)
static void
bld_flow(struct bld_context *bld, uint opcode,
- struct nv_value *src, struct nv_basic_block *target,
+ struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
boolean reconverge)
{
struct nv_instruction *nvi;
@@ -661,8 +674,10 @@ bld_flow(struct bld_context *bld, uint opcode,
nvi = new_instruction(bld->pc, opcode);
nvi->target = target;
nvi->terminator = 1;
- if (src)
- bld_src_predicate(bld, nvi, 0, src);
+ if (pred) {
+ nvi->cc = cc;
+ bld_src_predicate(bld, nvi, 0, pred);
+ }
}
static ubyte
@@ -878,11 +893,10 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
break;
case TGSI_FILE_TEMPORARY:
assert(idx < BLD_MAX_TEMPS);
- if (!res->insn)
+ if (!res->insn || res->insn->bb != bld->pc->current_block)
res = bld_insn_1(bld, NV_OP_MOV, res);
assert(res->reg.file == NV_FILE_GPR);
- assert(res->insn->bb = bld->pc->current_block);
if (bld->ti->require_stores)
bld_lmem_store(bld, ptr, idx * 4 + chan, res);
@@ -979,14 +993,6 @@ bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
}
static struct nv_value *
-bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
-{
- if (bld->saved_inputs[i][c])
- return bld->saved_inputs[i][c];
- return NULL;
-}
-
-static struct nv_value *
bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
{
unsigned cent = mode & NVC0_INTERP_CENTROID;
@@ -1053,9 +1059,9 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
case TGSI_FILE_INPUT:
assert(!src->Register.Dimension);
if (!ptr) {
- res = bld_get_saved_input(bld, idx, swz);
+ res = bld->saved_inputs[idx][swz];
if (res)
- return res;
+ break;
}
res = new_value(bld->pc, bld->ti->input_file, 4);
if (ptr)
@@ -1173,7 +1179,7 @@ static INLINE void
describe_texture_target(unsigned target, int *dim,
int *array, int *cube, int *shadow)
{
- *array = *cube = *shadow = 0;
+ *dim = *array = *cube = *shadow = 0;
switch (target) {
case TGSI_TEXTURE_1D:
@@ -1199,11 +1205,6 @@ describe_texture_target(unsigned target, int *dim,
*dim = 2;
*cube = 1;
break;
- /*
- case TGSI_TEXTURE_CUBE_ARRAY:
- *dim = 2;
- *cube = *array = 1;
- break;
case TGSI_TEXTURE_1D_ARRAY:
*dim = *array = 1;
break;
@@ -1211,6 +1212,7 @@ describe_texture_target(unsigned target, int *dim,
*dim = 2;
*array = 1;
break;
+ /*
case TGSI_TEXTURE_SHADOW1D_ARRAY:
*dim = *array = *shadow = 1;
break;
@@ -1220,7 +1222,7 @@ describe_texture_target(unsigned target, int *dim,
break;
case TGSI_TEXTURE_CUBE_ARRAY:
*dim = 2;
- *array = *cube = 1;
+ *cube = *array = 1;
break;
*/
default:
@@ -1338,10 +1340,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
if (array)
arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]);
- /* ensure that all inputs reside in a GPR */
- for (c = 0; c < dim + array + cube + shadow; ++c)
- (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1;
-
/* bind { layer x y z } and { lod/bias shadow } to adjacent regs */
bnd = new_instruction(bld->pc, NV_OP_BIND);
@@ -1383,21 +1381,12 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc,
nvi->tex_dim = dim;
nvi->tex_cube = cube;
nvi->tex_shadow = shadow;
+ nvi->tex_array = array;
nvi->tex_live = 0;
return nvi;
}
-/*
-static boolean
-bld_is_constant(struct nv_value *val)
-{
- if (val->reg.file == NV_FILE_IMM)
- return TRUE;
- return val->insn && nvCG_find_constant(val->insn->src[0]);
-}
-*/
-
static void
bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
const struct tgsi_full_instruction *insn)
@@ -1413,7 +1402,7 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
assert(dim + array + shadow + lodbias <= 5);
- if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+ if (!cube && !array && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
load_proj_tex_coords(bld, t, dim, shadow, insn);
else {
for (c = 0; c < dim + cube + array; ++c)
@@ -1504,10 +1493,10 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_CMP:
FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
src0 = emit_fetch(bld, insn, 0, c);
- src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
src1 = emit_fetch(bld, insn, 1, c);
src2 = emit_fetch(bld, insn, 2, c);
- dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
+ dst0[c] = bld_insn_3(bld, NV_OP_SLCT_F32, src1, src2, src0);
+ dst0[c]->insn->set_cond = NV_CC_LT;
}
break;
case TGSI_OPCODE_COS:
@@ -1601,6 +1590,7 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_IF:
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
@@ -1609,10 +1599,19 @@ bld_instruction(struct bld_context *bld,
bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
- src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
- emit_fetch(bld, insn, 0, 0), bld->zero);
+ if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
+ pred = bld_clone(bld, pred->insn);
+ pred->reg.size = 1;
+ pred->reg.file = NV_FILE_PRED;
+ if (pred->insn->opcode == NV_OP_FSET_F32)
+ pred->insn->opcode = NV_OP_SET_F32;
+ } else {
+ pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
+ pred, bld->zero);
+ }
+ assert(!mask);
- bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
+ bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
++bld->cond_lvl;
bld_new_block(bld, b);
@@ -1638,6 +1637,10 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *b = new_basic_block(bld->pc);
+ if (bld->pc->current_block->exit &&
+ !bld->pc->current_block->exit->terminator)
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, b, FALSE);
+
--bld->cond_lvl;
nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
@@ -1678,7 +1681,7 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
@@ -1690,7 +1693,7 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
@@ -1705,9 +1708,11 @@ bld_instruction(struct bld_context *bld,
{
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
- bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+ if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
+ bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
- nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ }
bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
@@ -1824,11 +1829,11 @@ bld_instruction(struct bld_context *bld,
case TGSI_OPCODE_SSG:
FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
src0 = emit_fetch(bld, insn, 0, c);
- src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
- temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
- temp = bld_insn_2(bld, NV_OP_OR, temp, bld_imm_f32(bld, 1.0f));
- dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
- bld_src_predicate(bld, dst0[c]->insn, 1, src1);
+ src1 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src2 = bld_insn_2(bld, NV_OP_FSET_F32, src0, bld->zero);
+ src1->insn->set_cond = NV_CC_GT;
+ src2->insn->set_cond = NV_CC_LT;
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
}
break;
case TGSI_OPCODE_SUB:
@@ -1892,10 +1897,10 @@ bld_instruction(struct bld_context *bld,
}
for (c = 0; c < 4; ++c)
- if ((mask & (1 << c)) &&
- ((dst0[c]->reg.file == NV_FILE_IMM) ||
- (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
- dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+ if (mask & (1 << c))
+ if ((dst0[c]->reg.file == NV_FILE_IMM) ||
+ (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63))
+ dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
c = 0;
if ((mask & 0x3) == 0x3) {
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
index 286b382f58e..b279bdc6e7d 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -10,7 +10,8 @@ struct nvc0_transfer {
struct pipe_transfer base;
struct nvc0_m2mf_rect rect[2];
uint32_t nblocksx;
- uint32_t nblocksy;
+ uint16_t nblocksy;
+ uint16_t nlayers;
};
static void
@@ -242,23 +243,36 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
struct nvc0_miptree_level *lvl = &mt->level[level];
struct nvc0_transfer *tx;
uint32_t size;
- uint32_t w, h, d, z, layer;
+ uint32_t w, h, d, z, layer, box_h, box_y;
int ret;
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ box_y = box->y;
+ box_h = box->height;
+
if (mt->layout_3d) {
z = box->z;
d = u_minify(res->depth0, level);
layer = 0;
+ tx->nlayers = box->depth;
} else {
z = 0;
d = 1;
- layer = box->z;
+ if (res->target == PIPE_TEXTURE_1D ||
+ res->target == PIPE_TEXTURE_1D_ARRAY) {
+ box_y = 0;
+ box_h = 1;
+ layer = box->y;
+ tx->nlayers = box->height;
+ } else {
+ layer = box->z;
+ tx->nlayers = box->depth;
+ }
}
- tx = CALLOC_STRUCT(nvc0_transfer);
- if (!tx)
- return NULL;
-
pipe_resource_reference(&tx->base.resource, res);
tx->base.level = level;
@@ -266,7 +280,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->base.box = *box;
tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
- tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box_h);
tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
tx->base.layer_stride = tx->nblocksy * tx->base.stride;
@@ -280,7 +294,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[0].base = lvl->offset + layer * mt->layer_stride;
tx->rect[0].tile_mode = lvl->tile_mode;
tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
- tx->rect[0].y = util_format_get_nblocksy(res->format, box->y);
+ tx->rect[0].y = util_format_get_nblocksy(res->format, box_y);
tx->rect[0].z = z;
tx->rect[0].width = util_format_get_nblocksx(res->format, w);
tx->rect[0].height = util_format_get_nblocksy(res->format, h);
@@ -291,7 +305,7 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
size = tx->base.layer_stride;
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
- size * tx->base.box.depth, &tx->rect[1].bo);
+ size * tx->nlayers, &tx->rect[1].bo);
if (ret) {
FREE(tx);
return NULL;
@@ -304,8 +318,9 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[1].domain = NOUVEAU_BO_GART;
if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
unsigned i;
- for (i = 0; i < box->depth; ++i) {
+ for (i = 0; i < tx->nlayers; ++i) {
nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
@@ -314,9 +329,10 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
tx->rect[0].base += mt->layer_stride;
tx->rect[1].base += size;
}
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
}
- tx->rect[0].z = z;
- tx->rect[1].base = 0;
return &tx->base;
}
@@ -331,7 +347,7 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx,
unsigned i;
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
- for (i = 0; i < tx->base.box.depth; ++i) {
+ for (i = 0; i < tx->nlayers; ++i) {
nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index a51a887ed89..2db43d8704b 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -54,12 +54,13 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
assert(num_elements);
so = MALLOC(sizeof(*so) +
- (num_elements - 1) * sizeof(struct nvc0_vertex_element));
+ num_elements * sizeof(struct nvc0_vertex_element));
if (!so)
return NULL;
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
+ so->need_conversion = FALSE;
transkey.nr_elements = 0;
transkey.output_stride = 0;
@@ -83,6 +84,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
}
so->element[i].state |= i;
@@ -152,7 +154,7 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
/* TODO: use min and max instance divisor to get a proper range */
*base = 0;
- *size = (nvc0->vtxbuf[vbi].max_index + 1) * nvc0->vtxbuf[vbi].stride;
+ *size = nvc0->vtxbuf[vbi].buffer->width0;
} else {
assert(nvc0->vbo_max_index != ~0);
*base = nvc0->vbo_min_index * nvc0->vtxbuf[vbi].stride;
@@ -171,12 +173,15 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
nvc0->vbo_fifo = nvc0->vbo_user = 0;
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
vb = &nvc0->vtxbuf[i];
if (!vb->stride)
continue;
buf = nvc0_resource(vb->buffer);
+ /* NOTE: user buffers with temporary storage count as mapped by GPU */
if (!nvc0_resource_mapped_by_gpu(vb->buffer)) {
if (nvc0->vbo_push_hint) {
nvc0->vbo_fifo = ~0;
@@ -227,16 +232,30 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
}
offset = vb->buffer_offset + ve->src_offset;
+ MARK_RING (chan, 6, 4);
BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
OUT_RING (chan, i);
- OUT_RESRCh(chan, buf, size - 1, NOUVEAU_BO_RD);
- OUT_RESRCl(chan, buf, size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCh(chan, buf, base + size - 1, NOUVEAU_BO_RD);
+ OUT_RESRCl(chan, buf, base + size - 1, NOUVEAU_BO_RD);
OUT_RESRCh(chan, buf, offset, NOUVEAU_BO_RD);
OUT_RESRCl(chan, buf, offset, NOUVEAU_BO_RD);
}
nvc0->vbo_dirty = TRUE;
}
+static INLINE void
+nvc0_release_user_vbufs(struct nvc0_context *nvc0)
+{
+ uint32_t vbo_user = nvc0->vbo_user;
+
+ while (vbo_user) {
+ int i = ffs(vbo_user) - 1;
+ vbo_user &= ~(1 << i);
+
+ nvc0_buffer_release_gpu_storage(nvc0_resource(nvc0->vtxbuf[i].buffer));
+ }
+}
+
void
nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
{
@@ -246,7 +265,12 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
struct nvc0_vertex_element *ve;
unsigned i;
- nvc0_prevalidate_vbufs(nvc0);
+ if (unlikely(vertex->need_conversion)) {
+ nvc0->vbo_fifo = ~0;
+ nvc0->vbo_user = 0;
+ } else {
+ nvc0_prevalidate_vbufs(nvc0);
+ }
BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
for (i = 0; i < vertex->num_elements; ++i) {
@@ -292,6 +316,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
size = vb->buffer->width0;
offset = ve->pipe.src_offset + vb->buffer_offset;
+ MARK_RING (chan, 8, 4);
BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
OUT_RING (chan, (1 << 12) | vb->stride);
BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
@@ -346,57 +371,10 @@ nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
{
struct nvc0_context *nvc0 = chan->user_private;
- nvc0_bufctx_emit_relocs(nvc0);
-}
+ nvc0_screen_fence_update(nvc0->screen, TRUE);
-#if 0
-static struct nouveau_bo *
-nvc0_tfb_setup(struct nvc0_context *nvc0)
-{
- struct nouveau_channel *chan = nvc0->screen->base.channel;
- struct nouveau_bo *tfb = NULL;
- int ret, i;
-
- ret = nouveau_bo_new(nvc0->screen->base.device,
- NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
- if (ret)
- return NULL;
-
- ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
- if (ret)
- return NULL;
- memset(tfb->map, 0xee, 8 * 4 * 3);
- nouveau_bo_unmap(tfb);
-
- BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
- OUT_RING (chan, 1);
- OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- OUT_RING (chan, tfb->size);
- OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID(0) */
- BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
- OUT_RING (chan, 0);
- OUT_RING (chan, 8); /* TFB_VARYING_COUNT(0) */
- OUT_RING (chan, 32); /* TFB_BUFFER_STRIDE(0) */
- BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
- OUT_RING (chan, 0x1f1e1d1c);
- OUT_RING (chan, 0xa3a2a1a0);
- for (i = 1; i < 4; ++i) {
- BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
- OUT_RING (chan, 0);
- }
- BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D_(0x135c), 1);
- OUT_RING (chan, 1);
- BEGIN_RING(chan, RING_3D_(0x135c), 1);
- OUT_RING (chan, 0);
-
- return tfb;
+ nvc0_bufctx_emit_relocs(nvc0);
}
-#endif
static void
nvc0_draw_arrays(struct nvc0_context *nvc0,
@@ -422,7 +400,7 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
- chan->flush_notify = NULL;
+ chan->flush_notify = nvc0_default_flush_notify;
}
static void
@@ -592,7 +570,7 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
}
}
- chan->flush_notify = NULL;
+ chan->flush_notify = nvc0_default_flush_notify;
}
void
@@ -611,6 +589,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0->vbo_min_index = info->min_index;
nvc0->vbo_max_index = info->max_index;
+ if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo)
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
nvc0_update_user_vbufs(nvc0);
@@ -668,4 +649,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
}
+
+ nvc0_release_user_vbufs(nvc0);
}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
index 1544fb7a1de..45f71967eff 100644
--- a/src/gallium/drivers/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -95,7 +95,7 @@ OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
unsigned delta, unsigned flags)
{
if (flags & NOUVEAU_BO_WR)
- res->status |= NVC0_BUFFER_STATUS_DIRTY;
+ res->status |= NVC0_BUFFER_STATUS_GPU_WRITING;
return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
}
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 01dacb43dad..b72379d6536 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_transfer.h"
#include "translate/translate.h"
#include "nvfx_context.h"
@@ -631,4 +632,6 @@ nvfx_init_vbo_functions(struct nvfx_context *nvfx)
nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
+
+ nvfx->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index a43e83c0d36..4f86db39926 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -55,11 +55,12 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o
util_blitter_save_viewport(r300->blitter, &r300->viewport);
util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state);
util_blitter_save_vertex_elements(r300->blitter, r300->velems);
- util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
- r300->vertex_buffer);
+ util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers,
+ r300->vbuf_mgr->vertex_buffer);
- if (op & (R300_CLEAR_SURFACE | R300_COPY))
+ if (op & (R300_CLEAR_SURFACE | R300_COPY)) {
util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+ }
if (op & R300_COPY) {
struct r300_textures_state* state =
@@ -108,6 +109,14 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
return r300_surface(fb->cbufs[0])->cbzb_allowed;
}
+static boolean r300_fast_zclear_allowed(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level];
+}
+
static uint32_t r300_depth_clear_value(enum pipe_format format,
double depth, unsigned stencil)
{
@@ -132,37 +141,46 @@ static void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil)
{
- /* My notes about fastfill:
+ /* My notes about Zbuffer compression:
*
- * 1) Only the zbuffer is cleared.
+ * 1) The zbuffer must be micro-tiled and whole microtiles must be
+ * written if compression is enabled. If microtiling is disabled,
+ * it locks up.
*
- * 2) The zbuffer must be micro-tiled and whole microtiles must be
- * written. If microtiling is disabled, it locks up.
+ * 2) There is ZMASK RAM which contains a compressed zbuffer.
+ * Each dword of the Z Mask contains compression information
+ * for 16 4x4 pixel tiles, that is 2 bits for each tile.
+ * On chips with 2 Z pipes, every other dword maps to a different
+ * pipe. On newer chipsets, there is a new compression mode
+ * with 8x8 pixel tiles per 2 bits.
*
- * 3) There is Z Mask RAM which contains a compressed zbuffer and
- * it interacts with fastfill. We should figure out how to use it
- * to get more performance.
- * This is what we know about the Z Mask:
+ * 3) The FASTFILL bit has nothing to do with filling. It only tells hw
+ * it should look in the ZMASK RAM first before fetching from a real
+ * zbuffer.
*
- * Each dword of the Z Mask contains compression information
- * for 16 4x4 pixel blocks, that is 2 bits for each block.
- * On chips with 2 Z pipes, every other dword maps to a different
- * pipe.
+ * 4) If a pixel is in a cleared state, ZB_DEPTHCLEARVALUE is returned
+ * during zbuffer reads instead of the value that is actually stored
+ * in the zbuffer memory. A pixel is in a cleared state when its ZMASK
+ * is equal to 0. Therefore, if you clear ZMASK with zeros, you may
+ * leave the zbuffer memory uninitialized, but then you must enable
+ * compression, so that the ZMASK RAM is actually used.
*
- * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must
- * be equal to 0. (clear the Z Mask RAM with zeros)
+ * 5) Each 4x4 (or 8x8) tile is automatically decompressed and recompressed
+ * during zbuffer updates. A special decompressing operation should be
+ * used to fully decompress a zbuffer, which basically just stores all
+ * compressed tiles in ZMASK to the zbuffer memory.
*
- * 5) For 16-bit zbuffer, compression causes a hung with one or
+ * 6) For a 16-bit zbuffer, compression causes a hung with one or
* two samples and should not be used.
*
- * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
+ * 7) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears
* to avoid needless decompression.
*
- * 7) Fastfill must not be used if reading of compressed Z data is disabled
+ * 8) Fastfill must not be used if reading of compressed Z data is disabled
* and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
* i.e. it cannot be used to compress the zbuffer.
*
- * 8) ZB_CB_CLEAR does not interact with fastfill in any way.
+ * 9) ZB_CB_CLEAR does not interact with zbuffer compression in any way.
*
* - Marek
*/
@@ -172,32 +190,30 @@ static void r300_clear(struct pipe_context* pipe,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
- struct r300_texture *zstex =
- fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
+ struct r300_resource *zstex =
+ fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
-
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
- r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (zstex->zmask_mem[fb->zsbuf->u.tex.level]) {
+ if (r300_fast_zclear_allowed(r300)) {
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
+
if (zstex->hiz_mem[fb->zsbuf->u.tex.level])
r300_mark_atom_dirty(r300, &r300->hiz_clear);
+
+ /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ * once hiz offset is constant. */
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Enable CBZB clear. */
@@ -211,7 +227,7 @@ static void r300_clear(struct pipe_context* pipe,
height = surf->cbzb_height;
r300->cbzb_clear = TRUE;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Clear. */
@@ -225,7 +241,7 @@ static void r300_clear(struct pipe_context* pipe,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
} else if (r300->zmask_clear.dirty) {
- /* Just clear zmask and hiz now, this does not use a standard draw
+ /* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
unsigned dwords;
@@ -257,16 +273,15 @@ static void r300_clear(struct pipe_context* pipe,
if (r300->cbzb_clear) {
r300->cbzb_clear = FALSE;
hyperz->zb_depthclearvalue = hyperz_dcv;
- r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
/* Enable fastfill and/or hiz.
*
* If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
* looks if zmask/hiz is in use and enables fastfill accordingly. */
- if (zstex &&
- (zstex->zmask_in_use[fb->zsbuf->u.tex.level] ||
- zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
+ if (r300->zmask_in_use ||
+ (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
@@ -280,16 +295,16 @@ static void r300_clear_render_target(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_render_target(r300->blitter, dst, rgba,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
/* Clear a region of a depth stencil surface. */
@@ -302,83 +317,70 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
unsigned width, unsigned height)
{
struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
- /* Decompress zbuffers that are bound as textures. If we didn't flush here,
- * it would happen inside the blitter when updating derived state,
- * causing a blitter operation to be called from inside the blitter,
- * which would overwrite saved states and they would never get restored. */
- r300_flush_depth_textures(r300);
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (fb->zsbuf->texture == dst->texture) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
r300_blitter_end(r300);
+
+ if (r300->zmask_locked) {
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
}
-/* Flush a depth stencil buffer. */
-static void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned level,
- unsigned layer)
+void r300_decompress_zmask(struct r300_context *r300)
{
- struct r300_context *r300 = r300_context(pipe);
- struct pipe_surface *dstsurf, surf_tmpl;
- struct r300_texture *tex = r300_texture(dst);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
- if (!tex->zmask_mem[level])
- return;
- if (!tex->zmask_in_use[level])
+ if (!r300->zmask_in_use || r300->zmask_locked)
return;
- surf_tmpl.format = dst->format;
- surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- surf_tmpl.u.tex.level = level;
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
- dstsurf = pipe->create_surface(pipe, dst, &surf_tmpl);
+ r300->zmask_decompress = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
- r300->z_decomp_rd = TRUE;
-
- r300_blitter_begin(r300, R300_CLEAR_SURFACE);
- util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0,
+ r300->dsa_decompress_zmask);
r300_blitter_end(r300);
- r300->z_decomp_rd = FALSE;
- tex->zmask_in_use[level] = FALSE;
- pipe_surface_reference(&dstsurf, NULL);
+ r300->zmask_decompress = FALSE;
+ r300->zmask_in_use = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
-/* We can't use compressed zbuffers as samplers. */
-void r300_flush_depth_textures(struct r300_context *r300)
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
{
- struct r300_textures_state *state =
- (struct r300_textures_state*)r300->textures_state.state;
- unsigned i, level;
- unsigned count = MIN2(state->sampler_view_count,
- state->sampler_state_count);
-
- if (r300->z_decomp_rd)
- return;
-
- for (i = 0; i < count; i++)
- if (state->sampler_views[i] && state->sampler_states[i]) {
- struct pipe_resource *tex = state->sampler_views[i]->base.texture;
-
- if (tex->target == PIPE_TEXTURE_3D ||
- tex->target == PIPE_TEXTURE_CUBE)
- continue;
+ struct pipe_framebuffer_state fb = {0};
+ fb.width = r300->locked_zbuffer->width;
+ fb.height = r300->locked_zbuffer->height;
+ fb.nr_cbufs = 0;
+ fb.zsbuf = r300->locked_zbuffer;
+
+ r300->context.set_framebuffer_state(&r300->context, &fb);
+ r300_decompress_zmask(r300);
+}
- /* Ignore non-depth textures.
- * Also ignore reinterpreted depth textures, e.g. resource_copy. */
- if (!util_format_is_depth_or_stencil(tex->format))
- continue;
+void r300_decompress_zmask_locked(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state saved_fb = {0};
- for (level = 0; level <= tex->last_level; level++)
- if (r300_texture(tex)->zmask_in_use[level]) {
- /* We don't handle 3D textures and cubemaps yet. */
- r300_flush_depth_stencil(&r300->context, tex, level, 0);
- }
- }
+ util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
+ r300_decompress_zmask_locked_unsafe(r300);
+ r300->context.set_framebuffer_state(&r300->context, &saved_fb);
+ util_unreference_framebuffer_state(&saved_fb);
}
/* Copy a block of pixels from one surface to another using HW. */
@@ -393,8 +395,6 @@ static void r300_hw_copy_region(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
r300_blitter_begin(r300, R300_COPY);
-
- /* Do a copy */
util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box, TRUE);
r300_blitter_end(r300);
@@ -409,56 +409,102 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
unsigned src_level,
const struct pipe_box *src_box)
{
- enum pipe_format old_format = dst->format;
- enum pipe_format new_format = old_format;
- boolean is_depth;
-
- if (!pipe->screen->is_format_supported(pipe->screen,
- old_format, src->target,
- src->nr_samples,
- PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_SAMPLER_VIEW, 0) &&
- util_format_is_plain(old_format)) {
- switch (util_format_get_blocksize(old_format)) {
+ struct r300_context *r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct pipe_resource old_src = *src;
+ struct pipe_resource old_dst = *dst;
+ struct pipe_resource new_src = old_src;
+ struct pipe_resource new_dst = old_dst;
+ const struct util_format_description *desc =
+ util_format_description(dst->format);
+ struct pipe_box box;
+
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ if (fb->zsbuf->texture == src ||
+ fb->zsbuf->texture == dst) {
+ r300_decompress_zmask(r300);
+ } else {
+ r300->zmask_locked = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
+ }
+ }
+
+ /* Handle non-renderable plain formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
+ !pipe->screen->is_format_supported(pipe->screen,
+ src->format, src->target,
+ src->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW, 0) ||
+ !pipe->screen->is_format_supported(pipe->screen,
+ dst->format, dst->target,
+ dst->nr_samples,
+ PIPE_BIND_RENDER_TARGET, 0))) {
+ switch (util_format_get_blocksize(old_dst.format)) {
case 1:
- new_format = PIPE_FORMAT_I8_UNORM;
+ new_dst.format = PIPE_FORMAT_I8_UNORM;
break;
case 2:
- new_format = PIPE_FORMAT_B4G4R4A4_UNORM;
+ new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM;
break;
case 4:
- new_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
break;
case 8:
- new_format = PIPE_FORMAT_R16G16B16A16_UNORM;
+ new_dst.format = PIPE_FORMAT_R16G16B16A16_UNORM;
break;
default:
debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n"
"r300: surface_copy: Software fallback doesn't work for tiled textures.\n",
- util_format_short_name(old_format));
+ util_format_short_name(dst->format));
}
+ new_src.format = new_dst.format;
}
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth) {
- r300_flush_depth_stencil(pipe, src, src_level, src_box->z);
- }
+ /* Handle compressed formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ switch (util_format_get_blocksize(old_dst.format)) {
+ case 8:
+ /* 1 pixel = 4 bits,
+ * we set 1 pixel = 2 bytes ===> 4 times larger pixels. */
+ new_dst.format = PIPE_FORMAT_B4G4R4A4_UNORM;
+ break;
+ case 16:
+ /* 1 pixel = 8 bits,
+ * we set 1 pixel = 4 bytes ===> 4 times larger pixels. */
+ new_dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ }
- if (old_format != new_format) {
- r300_texture_reinterpret_format(pipe->screen,
- dst, new_format);
- r300_texture_reinterpret_format(pipe->screen,
- src, new_format);
+ /* Since the pixels are 4 times larger, we must decrease
+ * the image size and the coordinates 4 times. */
+ new_src.format = new_dst.format;
+ new_dst.height0 = (new_dst.height0 + 3) / 4;
+ new_src.height0 = (new_src.height0 + 3) / 4;
+ dsty /= 4;
+ box = *src_box;
+ box.y /= 4;
+ box.height = (box.height + 3) / 4;
+ src_box = &box;
}
+ if (old_src.format != new_src.format)
+ r300_resource_set_properties(pipe->screen, src, 0, &new_src);
+ if (old_dst.format != new_dst.format)
+ r300_resource_set_properties(pipe->screen, dst, 0, &new_dst);
+
r300_hw_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
- if (old_format != new_format) {
- r300_texture_reinterpret_format(pipe->screen,
- dst, old_format);
- r300_texture_reinterpret_format(pipe->screen,
- src, old_format);
+ if (old_src.format != new_src.format)
+ r300_resource_set_properties(pipe->screen, src, 0, &old_src);
+ if (old_dst.format != new_dst.format)
+ r300_resource_set_properties(pipe->screen, dst, 0, &old_dst);
+
+ if (r300->zmask_locked) {
+ r300->zmask_locked = FALSE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
}
diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h
index 9d3d4fc1b19..b373937a1f9 100644
--- a/src/gallium/drivers/r300/r300_cb.h
+++ b/src/gallium/drivers/r300/r300_cb.h
@@ -61,40 +61,52 @@
* that they neatly hide away, and don't have the cost of function setup, so
* we're going to use them. */
-#ifdef DEBUG
-#define CB_DEBUG(x) x
-#else
-#define CB_DEBUG(x)
-#endif
-
-
/**
* Command buffer setup.
*/
+#ifdef DEBUG
+
#define CB_LOCALS \
- CB_DEBUG(int cs_count = 0;) \
+ int cs_count = 0; \
uint32_t *cs_ptr = NULL; \
- CB_DEBUG((void) cs_count;) (void) cs_ptr;
+ (void) cs_count; (void) cs_ptr
-#define NEW_CB(ptr, size) do { \
- assert(sizeof(*ptr) == sizeof(uint32_t)); \
- cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
- CB_DEBUG(cs_count = size;) \
+#define BEGIN_CB(ptr, size) do { \
+ assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
+ cs_count = (size); \
+ cs_ptr = (ptr); \
} while (0)
-#define BEGIN_CB(ptr, size) do { \
- assert(sizeof(*ptr) == sizeof(uint32_t)); \
- cs_ptr = ptr; \
- CB_DEBUG(cs_count = size;) \
+#define NEW_CB(ptr, size) \
+ do { \
+ assert(sizeof(*(ptr)) == sizeof(uint32_t)); \
+ cs_count = (size); \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
} while (0)
#define END_CB do { \
- CB_DEBUG(if (cs_count != 0) \
+ if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
- cs_count, __FUNCTION__, __FILE__, __LINE__);) \
+ cs_count, __FUNCTION__, __FILE__, __LINE__); \
} while (0)
+#define CB_USED_DW(x) cs_count -= x
+
+#else
+
+#define CB_LOCALS \
+ uint32_t *cs_ptr = NULL; (void) cs_ptr
+
+#define NEW_CB(ptr, size) \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t))
+
+#define BEGIN_CB(ptr, size) cs_ptr = (ptr)
+#define END_CB
+#define CB_USED_DW(x)
+
+#endif
+
/**
* Storing pure DWORDs.
@@ -103,13 +115,13 @@
#define OUT_CB(value) do { \
*cs_ptr = (value); \
cs_ptr++; \
- CB_DEBUG(cs_count--;) \
+ CB_USED_DW(1); \
} while (0)
#define OUT_CB_TABLE(values, count) do { \
memcpy(cs_ptr, values, count * sizeof(uint32_t)); \
cs_ptr += count; \
- CB_DEBUG(cs_count -= count;) \
+ CB_USED_DW(count); \
} while (0)
#define OUT_CB_32F(value) \
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 2b183f62c56..1968d0feb35 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -241,7 +241,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
case 0x5A42:
caps->family = CHIP_FAMILY_RS400;
caps->has_tcl = FALSE;
- caps->zmask_ram = RV3xx_ZMASK_SIZE;
break;
case 0x5A61:
@@ -424,5 +423,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
}
caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+ caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4;
caps->dxtc_swizzle = caps->is_r400 || caps->is_r500;
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index f2035d20092..0be161fa07a 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -33,6 +33,13 @@
#define PIPE_ZMASK_SIZE 4096
#define RV3xx_ZMASK_SIZE 5120
+/* The size of a compressed tile. Each compressed tile takes 2 bits
+ * in the ZMASK RAM, so there is always 16 tiles per one dword. */
+enum r300_zmask_compression {
+ R300_ZCOMP_4X4 = 4,
+ R300_ZCOMP_8X8 = 8
+};
+
/* Structure containing all the possible information about a specific Radeon
* in the R3xx, R4xx, and R5xx families. */
struct r300_capabilities {
@@ -50,10 +57,12 @@ struct r300_capabilities {
unsigned num_tex_units;
/* Whether or not TCL is physically present */
boolean has_tcl;
- /* Some chipsets do not have HiZ RAM - other have varying amounts . */
+ /* Some chipsets do not have HiZ RAM - other have varying amounts. */
int hiz_ram;
- /* some chipsets have zmask ram per pipe some don't */
+ /* Some chipsets have zmask ram per pipe some don't. */
int zmask_ram;
+ /* Compression mode for ZMASK. */
+ enum r300_zmask_compression z_compress;
/* Whether or not this is RV350 or newer, including all r400 and r500
* chipsets. The differences compared to the oldest r300 chips are:
* - Blend LTE/GTE thresholds
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 91263ad7bcd..9f85bd4ce5f 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -35,26 +35,24 @@
#include "r300_screen_buffer.h"
#include "r300_winsys.h"
-#ifdef HAVE_LLVM
-#include "gallivm/lp_bld_init.h"
-#endif
-
static void r300_update_num_contexts(struct r300_screen *r300screen,
int diff)
{
+ pipe_mutex_lock(r300screen->num_contexts_mutex);
if (diff > 0) {
- p_atomic_inc(&r300screen->num_contexts);
+ r300screen->num_contexts++;
if (r300screen->num_contexts > 1)
util_slab_set_thread_safety(&r300screen->pool_buffers,
UTIL_SLAB_MULTITHREADED);
} else {
- p_atomic_dec(&r300screen->num_contexts);
+ r300screen->num_contexts--;
if (r300screen->num_contexts <= 1)
util_slab_set_thread_safety(&r300screen->pool_buffers,
UTIL_SLAB_SINGLETHREADED);
}
+ pipe_mutex_unlock(r300screen->num_contexts_mutex);
}
static void r300_release_referenced_objects(struct r300_context *r300)
@@ -87,17 +85,14 @@ static void r300_release_referenced_objects(struct r300_context *r300)
/* The SWTCL VBO. */
pipe_resource_reference(&r300->vbo, NULL);
- /* Vertex buffers. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
- pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
- }
-
/* If there are any queries pending or not destroyed, remove them now. */
foreach_s(query, temp, &r300->query_list) {
remove_from_list(query);
FREE(query);
}
+
+ r300->context.delete_depth_stencil_alpha_state(&r300->context,
+ r300->dsa_decompress_zmask);
}
static void r300_destroy_context(struct pipe_context* context)
@@ -106,28 +101,15 @@ static void r300_destroy_context(struct pipe_context* context)
if (r300->blitter)
util_blitter_destroy(r300->blitter);
- if (r300->draw) {
+ if (r300->draw)
draw_destroy(r300->draw);
-#ifdef HAVE_LLVM
- gallivm_destroy(r300->gallivm);
-#endif
- }
-
- if (r300->upload_vb)
- u_upload_destroy(r300->upload_vb);
- if (r300->upload_ib)
- u_upload_destroy(r300->upload_ib);
-
- if (r300->tran.translate_cache)
- translate_cache_destroy(r300->tran.translate_cache);
+ if (r300->vbuf_mgr)
+ u_vbuf_mgr_destroy(r300->vbuf_mgr);
/* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- if (r300->zmask_mm)
- r300_hyperz_destroy_mm(r300);
-
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
@@ -247,7 +229,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
if (has_hiz_ram)
R300_INIT_ATOM(hiz_clear, 0);
/* zmask clear */
- R300_INIT_ATOM(zmask_clear, 0);
+ R300_INIT_ATOM(zmask_clear, 4);
}
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
@@ -432,12 +414,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
-#ifdef HAVE_LLVM
- r300->gallivm = gallivm_create();
- r300->draw = draw_create_gallivm(&r300->context, r300->gallivm);
-#else
r300->draw = draw_create(&r300->context);
-#endif
if (r300->draw == NULL)
goto fail;
/* Enable our renderer. */
@@ -456,6 +433,13 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_state_functions(r300);
r300_init_resource_functions(r300);
+ r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16,
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER,
+ U_VERTEX_FETCH_DWORD_ALIGNED);
+ if (!r300->vbuf_mgr)
+ goto fail;
+
r300->blitter = util_blitter_create(&r300->context);
if (r300->blitter == NULL)
goto fail;
@@ -470,23 +454,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300_hyperz_init_mm(r300))
goto fail;
- r300->upload_ib = u_upload_create(&r300->context,
- 64 * 1024, 16,
- PIPE_BIND_INDEX_BUFFER);
-
- if (r300->upload_ib == NULL)
- goto fail;
-
- r300->upload_vb = u_upload_create(&r300->context,
- 1024 * 1024, 16,
- PIPE_BIND_VERTEX_BUFFER);
- if (r300->upload_vb == NULL)
- goto fail;
-
- r300->tran.translate_cache = translate_cache_create();
- if (r300->tran.translate_cache == NULL)
- goto fail;
-
r300_init_states(&r300->context);
/* The KIL opcode needs the first texture unit to be enabled
@@ -515,7 +482,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
}
{
- struct pipe_resource vb = {};
+ struct pipe_resource vb;
+ memset(&vb, 0, sizeof(vb));
vb.target = PIPE_BUFFER;
vb.format = PIPE_FORMAT_R8_UNORM;
vb.bind = PIPE_BIND_VERTEX_BUFFER;
@@ -527,36 +495,44 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->dummy_vb = screen->resource_create(screen, &vb);
}
+ {
+ struct pipe_depth_stencil_alpha_state dsa;
+ memset(&dsa, 0, sizeof(dsa));
+ dsa.depth.writemask = 1;
+
+ r300->dsa_decompress_zmask =
+ r300->context.create_depth_stencil_alpha_state(&r300->context,
+ &dsa);
+ }
+
+ /* Print driver info. */
+#ifdef NDEBUG
+ if (DBG_ON(r300, DBG_INFO)) {
+#else
+ {
+#endif
+ fprintf(stderr,
+ "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
+ "r300: GART size: %d MB, VRAM size: %d MB\n"
+ "r300: AA compression: %s, Z compression: %s, HiZ: %s\n",
+ rws->get_value(rws, R300_VID_DRM_MAJOR),
+ rws->get_value(rws, R300_VID_DRM_MINOR),
+ rws->get_value(rws, R300_VID_DRM_PATCHLEVEL),
+ screen->get_name(screen),
+ rws->get_value(rws, R300_VID_PCI_ID),
+ rws->get_value(rws, R300_VID_GB_PIPES),
+ rws->get_value(rws, R300_VID_Z_PIPES),
+ rws->get_value(rws, R300_VID_GART_SIZE) >> 20,
+ rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20,
+ rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO",
+ rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO",
+ rws->get_value(rws, R300_CAN_HYPERZ) &&
+ r300->screen->caps.hiz_ram ? "YES" : "NO");
+ }
+
return &r300->context;
- fail:
+fail:
r300_destroy_context(&r300->context);
return NULL;
}
-
-void r300_finish(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb;
- unsigned i;
-
- /* This is a preliminary implementation of glFinish.
- *
- * The ideal implementation should use something like EmitIrqLocked and
- * WaitIrq, or better, real fences.
- */
- if (r300->fb_state.state) {
- fb = r300->fb_state.state;
-
- for (i = 0; i < fb->nr_cbufs; i++) {
- if (fb->cbufs[i]->texture) {
- r300->rws->buffer_wait(r300->rws,
- r300_texture(fb->cbufs[i]->texture)->buffer);
- return;
- }
- }
- if (fb->zsbuf && fb->zsbuf->texture) {
- r300->rws->buffer_wait(r300->rws,
- r300_texture(fb->zsbuf->texture)->buffer);
- }
- }
-}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index e09cf87f733..30073759476 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -30,11 +30,11 @@
#include "pipe/p_context.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
-
-#include "translate/translate_cache.h"
+#include "util/u_vbuf_mgr.h"
#include "r300_defines.h"
#include "r300_screen.h"
+#include "r300_winsys.h"
struct u_upload_mgr;
struct r300_context;
@@ -189,11 +189,6 @@ struct r300_sampler_view {
uint32_t texcache_region;
};
-struct r300_texture_fb_state {
- uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */
- uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */
-};
-
struct r300_texture_sampler_state {
struct r300_texture_format_state format;
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
@@ -273,14 +268,12 @@ struct r300_query {
/* How many results have been written, in dwords. It's incremented
* after end_query and flush. */
unsigned num_results;
- /* if we've flushed the query */
- boolean flushed;
/* if begin has been emitted */
boolean begin_emitted;
/* The buffer where query results are stored. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
/* The size of the buffer. */
unsigned buffer_size;
/* The domain of the buffer. */
@@ -291,28 +284,12 @@ struct r300_query {
struct r300_query* next;
};
-/* Fence object.
- *
- * This is a fake fence. Instead of syncing with the fence, we sync
- * with the context, which is inefficient but compliant.
- *
- * This is not a subclass of pipe_fence_handle because pipe_fence_handle is
- * never actually fully defined. So, rather than have it as a member, and do
- * subclass-style casting, we treat pipe_fence_handle as an opaque, and just
- * trust that our state tracker does not ever mess up fence objects.
- */
-struct r300_fence {
- struct pipe_reference reference;
- struct r300_context *ctx;
- boolean signalled;
-};
-
struct r300_surface {
struct pipe_surface base;
/* Winsys buffer backing the texture. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
enum r300_buffer_domain domain;
@@ -329,13 +306,9 @@ struct r300_surface {
/* Whether the CBZB clear is allowed on the surface. */
boolean cbzb_allowed;
-
};
struct r300_texture_desc {
- /* Parent class. */
- struct u_resource b;
-
/* Width, height, and depth.
* Most of the time, these are equal to pipe_texture::width0, height0,
* and depth0. However, NPOT 3D textures must have dimensions aligned
@@ -387,27 +360,38 @@ struct r300_texture_desc {
/* Whether CBZB fast color clear is allowed on the miplevel. */
boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS];
+
+ /* Zbuffer compression info for each miplevel. */
+ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS];
+ /* If zero, then disable compression. */
+ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS];
};
-struct r300_texture {
- struct r300_texture_desc desc;
+struct r300_resource
+{
+ struct u_vbuf_resource b;
+ /* Winsys buffer backing this resource. */
+ struct r300_winsys_bo *buf;
+ struct r300_winsys_cs_handle *cs_buf;
enum r300_buffer_domain domain;
+ unsigned buf_size;
+
+ /* Constant buffers are in user memory. */
+ uint8_t *constant_buffer;
- /* Pipe buffer backing this texture. */
- struct r300_winsys_buffer *buffer;
- struct r300_winsys_cs_buffer *cs_buffer;
+ /* Texture description (addressing, layout, special features). */
+ struct r300_texture_desc tex;
/* Registers carrying texture format data. */
/* Only format-independent bits should be filled in. */
struct r300_texture_format_state tx_format;
- /* All bits should be filled in. */
- struct r300_texture_fb_state fb_state;
- /* hyper-z memory allocs */
+ /* Where the texture starts in the buffer. */
+ unsigned tex_offset;
+
+ /* HiZ memory allocations. */
struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
- struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
- boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
/* This is the level tiling flags were last time set for.
@@ -418,32 +402,16 @@ struct r300_texture {
struct r300_vertex_element_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+ unsigned format_size[PIPE_MAX_ATTRIBS];
- /* If (velem[i].src_format != hw_format[i]), the vertex buffer
- * referenced by this vertex element cannot be used for rendering and
- * its vertex data must be translated to hw_format[i]. */
- enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
- unsigned hw_format_size[PIPE_MAX_ATTRIBS];
+ struct u_vbuf_mgr_elements *vmgr_elements;
/* The size of the vertex, in dwords. */
unsigned vertex_size_dwords;
- /* This might mean two things:
- * - src_format != hw_format, as discussed above.
- * - src_offset % 4 != 0. */
- boolean incompatible_layout;
-
struct r300_vertex_stream_state vertex_stream;
};
-struct r300_translate_context {
- /* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
-
- /* Saved and new vertex element state. */
- void *saved_velems, *new_velems;
-};
-
struct r300_context {
/* Parent class */
struct pipe_context context;
@@ -456,7 +424,6 @@ struct r300_context {
struct r300_screen *screen;
/* Draw module. Used mostly for SW TCL. */
- struct gallivm_state *gallivm;
struct draw_context* draw;
/* Vertex buffer for SW TCL. */
struct pipe_resource* vbo;
@@ -471,8 +438,6 @@ struct r300_context {
struct blitter_context* blitter;
/* Stencil two-sided reference value fallback. */
struct r300_stencilref_context *stencilref_fallback;
- /* For translating vertex buffers having incompatible vertex layout. */
- struct r300_translate_context tran;
/* The KIL opcode needs the first texture unit to be enabled
* on r3xx-r4xx. In order to calm down the CS checker, we bind this
@@ -554,14 +519,6 @@ struct r300_context {
/* The pointers to the first and the last atom. */
struct r300_atom *first_dirty, *last_dirty;
- /* Vertex buffers for Gallium. */
- /* May contain user buffers. */
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- /* Contains only non-user buffers. */
- struct pipe_resource *valid_vertex_buffer[PIPE_MAX_ATTRIBS];
- int vertex_buffer_count;
- int vertex_buffer_max_index;
- boolean any_user_vbs;
/* Vertex elements for Gallium. */
struct r300_vertex_element_state *velems;
@@ -588,21 +545,23 @@ struct r300_context {
int sprite_coord_enable;
/* Whether two-sided color selection is enabled (AKA light_twoside). */
boolean two_sided_color;
- /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
- boolean incompatible_vb_layout;
-#define R300_Z_COMPRESS_44 1
-#define RV350_Z_COMPRESS_88 2
- int z_compression;
+
boolean cbzb_clear;
- boolean z_decomp_rd;
+ /* Whether ZMASK is enabled. */
+ boolean zmask_in_use;
+ /* Whether ZMASK is being decompressed. */
+ boolean zmask_decompress;
+ /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */
+ boolean zmask_locked;
+ /* The zbuffer the ZMASK of which is locked. */
+ struct pipe_surface *locked_zbuffer;
+
+ void *dsa_decompress_zmask;
/* two mem block managers for hiz/zmask ram space */
struct mem_block *hiz_mm;
- struct mem_block *zmask_mm;
- /* upload managers */
- struct u_upload_mgr *upload_vb;
- struct u_upload_mgr *upload_ib;
+ struct u_vbuf_mgr *vbuf_mgr;
struct util_slab_mempool pool_transfers;
@@ -612,16 +571,10 @@ struct r300_context {
/* const tracking for VS */
int vs_const_base;
- /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */
- uint32_t vertex_arrays_cb[(16 * 3 + 1) / 2];
+ /* Vertex array state info */
boolean vertex_arrays_dirty;
-
- /* Whether any buffer (FB, textures, VBOs) has been set, but buffers
- * haven't been validated yet. */
- boolean validate_buffers;
- /* Whether user buffers have been validated. */
- boolean upload_vb_validated;
- boolean upload_ib_validated;
+ boolean vertex_arrays_indexed;
+ int vertex_arrays_offset;
};
#define foreach_atom(r300, atom) \
@@ -641,9 +594,9 @@ static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
return (struct r300_surface*)surf;
}
-static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex)
+static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
{
- return (struct r300_texture*)tex;
+ return (struct r300_resource*)tex;
}
static INLINE struct r300_context* r300_context(struct pipe_context* context)
@@ -675,7 +628,6 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
struct pipe_context* r300_create_context(struct pipe_screen* screen,
void *priv);
-void r300_finish(struct r300_context *r300);
void r300_flush_cb(void *data);
/* Context initialization. */
@@ -688,7 +640,9 @@ void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
/* r300_blit.c */
-void r300_flush_depth_textures(struct r300_context *r300);
+void r300_decompress_zmask(struct r300_context *r300);
+void r300_decompress_zmask_locked_unsafe(struct r300_context *r300);
+void r300_decompress_zmask_locked(struct r300_context *r300);
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
@@ -696,9 +650,6 @@ void r300_resume_query(struct r300_context *r300,
void r300_stop_query(struct r300_context *r300);
/* r300_render_translate.c */
-void r300_begin_vertex_translate(struct r300_context *r300,
- int min_index, int max_index);
-void r300_end_vertex_translate(struct r300_context *r300);
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned *index_size, unsigned index_offset,
@@ -714,8 +665,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias);
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0,
- R300_CHANGED_CBZB_FLAG,
- R300_CHANGED_ZCLEAR_FLAG,
+ R300_CHANGED_HYPERZ_FLAG,
R300_CHANGED_MULTIWRITE
};
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 6726f100e1b..2e52dfa43c6 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -35,12 +35,6 @@
* that they neatly hide away, and don't have the cost of function setup,so
* we're going to use them. */
-#ifdef DEBUG
-#define CS_DEBUG(x) x
-#else
-#define CS_DEBUG(x)
-#endif
-
/**
* Command submission setup.
*/
@@ -50,22 +44,29 @@
struct r300_winsys_screen *cs_winsys = (context)->rws; \
int cs_count = 0; (void) cs_count; (void) cs_winsys;
+#ifdef DEBUG
+
#define BEGIN_CS(size) do { \
assert(size <= (R300_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
- CS_DEBUG(cs_count = size;) \
+ cs_count = size; \
} while (0)
-#ifdef DEBUG
#define END_CS do { \
if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
cs_count, __FUNCTION__, __FILE__, __LINE__); \
cs_count = 0; \
} while (0)
+
+#define CS_USED_DW(x) cs_count -= (x)
+
#else
+
+#define BEGIN_CS(size)
#define END_CS
-#endif
+#define CS_USED_DW(x)
+#endif
/**
* Writing pure DWORDs.
@@ -73,7 +74,7 @@
#define OUT_CS(value) do { \
cs_copy->buf[cs_copy->cdw++] = (value); \
- CS_DEBUG(cs_count--;) \
+ CS_USED_DW(1); \
} while (0)
#define OUT_CS_32F(value) \
@@ -98,7 +99,7 @@
#define OUT_CS_TABLE(values, count) do { \
memcpy(cs_copy->buf + cs_copy->cdw, values, count * 4); \
cs_copy->cdw += count; \
- CS_DEBUG(cs_count -= count;) \
+ CS_USED_DW(count); \
} while (0)
@@ -106,27 +107,11 @@
* Writing relocations.
*/
-#define OUT_CS_RELOC(bo, offset) do { \
- assert(bo); \
- OUT_CS(offset); \
- cs_winsys->cs_write_reloc(cs_copy, bo); \
- CS_DEBUG(cs_count -= 2;) \
-} while (0)
-
-#define OUT_CS_BUF_RELOC(bo, offset) do { \
- assert(bo); \
- OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \
-} while (0)
-
-#define OUT_CS_TEX_RELOC(tex, offset) do { \
- assert(tex); \
- OUT_CS_RELOC(tex->cs_buffer, offset); \
-} while (0)
-
-#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \
- assert(bo); \
- cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \
- CS_DEBUG(cs_count -= 2;) \
+#define OUT_CS_RELOC(r) do { \
+ assert((r)); \
+ assert((r)->cs_buf); \
+ cs_winsys->cs_write_reloc(cs_copy, (r)->cs_buf); \
+ CS_USED_DW(2); \
} while (0)
@@ -135,7 +120,7 @@
*/
#define WRITE_CS_TABLE(values, count) do { \
- CS_DEBUG(assert(cs_count == 0);) \
+ assert(cs_count == 0); \
memcpy(cs_copy->buf + cs_copy->cdw, (values), (count) * 4); \
cs_copy->cdw += (count); \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index d6aa90bd053..b60cfd1f248 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -27,6 +27,7 @@
#include <stdio.h>
static const struct debug_named_value debug_options[] = {
+ { "info", DBG_INFO, "Print hardware info"},
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log vertex program compilation" },
{ "pstat", DBG_P_STAT, "Log vertex/fragment program stats" },
@@ -49,6 +50,8 @@ static const struct debug_named_value debug_options[] = {
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
{ "noopt", DBG_NO_OPT, "Disable shader optimizations" },
{ "nocbzb", DBG_NO_CBZB, "Disable fast color clear" },
+ { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
+ { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
/* must be last */
DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d14cdcbbaf0..e2e4719ec82 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -89,7 +89,7 @@ static void get_rc_constant_state(
struct rc_constant * constant)
{
struct r300_textures_state* texstate = r300->textures_state.state;
- struct r300_texture *tex;
+ struct r300_resource *tex;
assert(constant->Type == RC_CONSTANT_STATE);
@@ -101,19 +101,19 @@ static void get_rc_constant_state(
/* Factor for converting rectangle coords to
* normalized coords. Should only show up on non-r500. */
case RC_STATE_R300_TEXRECT_FACTOR:
- tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
- vec[0] = 1.0 / tex->desc.width0;
- vec[1] = 1.0 / tex->desc.height0;
+ tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ vec[0] = 1.0 / tex->tex.width0;
+ vec[1] = 1.0 / tex->tex.height0;
vec[2] = 0;
vec[3] = 1;
break;
case RC_STATE_R300_TEXSCALE_FACTOR:
- tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture);
+ tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture);
/* Add a small number to the texture size to work around rounding errors in hw. */
- vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f);
- vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f);
- vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f);
+ vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f);
+ vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f);
+ vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f);
vec[3] = 1;
break;
@@ -352,11 +352,9 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state)
OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config);
if (aa->dest) {
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
- OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset);
-
- OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
- OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset);
+ OUT_CS_RELOC(aa->dest);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch);
}
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl);
@@ -391,11 +389,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
for (i = 0; i < fb->nr_cbufs; i++) {
surf = r300_surface(fb->cbufs[i]);
- OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->offset);
+ OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->pitch);
+ OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);
+ OUT_CS_RELOC(surf);
}
/* Set up the ZB part of the CBZB clear. */
@@ -404,11 +402,11 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset);
+ OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch);
+ OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch);
+ OUT_CS_RELOC(surf);
DBG(r300, DBG_CBZB,
"CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
@@ -420,19 +418,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->offset);
+ OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset);
+ OUT_CS_RELOC(surf);
- OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(surf->cs_buffer, surf->pitch);
+ OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);
+ OUT_CS_RELOC(surf);
if (can_hyperz) {
uint32_t surf_pitch;
- struct r300_texture *tex;
+ struct r300_resource *tex;
int level = surf->base.u.tex.level;
- tex = r300_texture(surf->base.texture);
+ tex = r300_resource(surf->base.texture);
surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK;
+
/* HiZ RAM. */
if (r300->screen->caps.hiz_ram) {
if (tex->hiz_mem[level]) {
@@ -443,14 +442,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_HIZ_PITCH, 0);
}
}
+
/* Z Mask RAM. (compressed zbuffer) */
- if (tex->zmask_mem[level]) {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
- } else {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
- OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0);
- }
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
+ OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
}
}
@@ -462,6 +457,7 @@ void r300_emit_hyperz_state(struct r300_context *r300,
{
struct r300_hyperz_state *z = state;
CS_LOCALS(r300);
+
if (z->flush)
WRITE_CS_TABLE(&z->cb_flush_begin, size);
else
@@ -563,14 +559,12 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
END_CS;
query->begin_emitted = TRUE;
- query->flushed = FALSE;
}
static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
struct r300_query *query)
{
struct r300_capabilities* caps = &r300->screen->caps;
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
assert(caps->num_frag_pipes);
@@ -588,25 +582,25 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
case 4:
/* pipe 3 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 3) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 3) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 3:
/* pipe 2 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 2) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 2) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 2:
/* pipe 1 only */
/* As mentioned above, accomodate RV380 and older. */
OUT_CS_REG(R300_SU_REG_DEST,
1 << (caps->high_second_pipe ? 3 : 1));
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
+ OUT_CS_RELOC(r300->query_current);
case 1:
/* pipe 0 only */
OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
+ OUT_CS_RELOC(r300->query_current);
break;
default:
fprintf(stderr, "r300: Implementation error: Chipset reports %d"
@@ -622,13 +616,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
static void rv530_emit_query_end_single_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(8);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, query->num_results * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, query->num_results * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -636,16 +629,15 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300,
static void rv530_emit_query_end_double_z(struct r300_context *r300,
struct r300_query *query)
{
- struct r300_winsys_cs_buffer *buf = r300->query_current->cs_buffer;
CS_LOCALS(r300);
BEGIN_CS(14);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 0) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 0) * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
- OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
- OUT_CS_RELOC(buf, (query->num_results + 1) * 4);
+ OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4);
+ OUT_CS_RELOC(r300->query_current);
OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
END_CS;
}
@@ -784,7 +776,7 @@ void r300_emit_textures_state(struct r300_context *r300,
{
struct r300_textures_state *allstate = (struct r300_textures_state*)state;
struct r300_texture_sampler_state *texstate;
- struct r300_texture *tex;
+ struct r300_resource *tex;
unsigned i;
CS_LOCALS(r300);
@@ -794,7 +786,7 @@ void r300_emit_textures_state(struct r300_context *r300,
for (i = 0; i < allstate->count; i++) {
if ((1 << i) & allstate->tx_enable) {
texstate = &allstate->regs[i];
- tex = r300_texture(allstate->sampler_views[i]->base.texture);
+ tex = r300_resource(allstate->sampler_views[i]->base.texture);
OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);
OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);
@@ -805,96 +797,56 @@ void r300_emit_textures_state(struct r300_context *r300,
OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
- OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_CS_TEX_RELOC(tex, texstate->format.tile_config);
+ OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config);
+ OUT_CS_RELOC(tex);
}
}
END_CS;
}
-static void r300_update_vertex_arrays_cb(struct r300_context *r300, unsigned packet_size)
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed)
{
- struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer;
+ struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
- unsigned *hw_format_size = r300->velems->hw_format_size;
- unsigned size1, size2, vertex_array_count = r300->velems->count;
+ struct r300_resource *buf;
int i;
- CB_LOCALS;
+ unsigned vertex_array_count = r300->velems->count;
+ unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
+ struct pipe_vertex_buffer *vb1, *vb2;
+ unsigned *hw_format_size;
+ unsigned size1, size2;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2 + packet_size + vertex_array_count * 2);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+ OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+
+ hw_format_size = r300->velems->format_size;
- BEGIN_CB(r300->vertex_arrays_cb, packet_size);
for (i = 0; i < vertex_array_count - 1; i += 2) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
vb2 = &vbuf[velem[i+1].vertex_buffer_index];
size1 = hw_format_size[i];
size2 = hw_format_size[i+1];
- OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
- OUT_CB(vb1->buffer_offset + velem[i].src_offset);
- OUT_CB(vb2->buffer_offset + velem[i+1].src_offset);
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
}
if (vertex_array_count & 1) {
vb1 = &vbuf[velem[i].vertex_buffer_index];
size1 = hw_format_size[i];
- OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
- OUT_CB(vb1->buffer_offset + velem[i].src_offset);
- }
- END_CB;
-
- r300->vertex_arrays_dirty = FALSE;
-}
-
-void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed)
-{
- struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
- struct pipe_resource **valid_vbuf = r300->valid_vertex_buffer;
- struct pipe_vertex_element *velem = r300->velems->velem;
- struct r300_buffer *buf;
- int i;
- unsigned vertex_array_count = r300->velems->count;
- unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
- CS_LOCALS(r300);
-
- BEGIN_CS(2 + packet_size + vertex_array_count * 2);
- OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
- OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
-
- if (!offset) {
- if (r300->vertex_arrays_dirty) {
- r300_update_vertex_arrays_cb(r300, packet_size);
- }
- OUT_CS_TABLE(r300->vertex_arrays_cb, packet_size);
- } else {
- struct pipe_vertex_buffer *vb1, *vb2;
- unsigned *hw_format_size = r300->velems->hw_format_size;
- unsigned size1, size2;
-
- for (i = 0; i < vertex_array_count - 1; i += 2) {
- vb1 = &vbuf[velem[i].vertex_buffer_index];
- vb2 = &vbuf[velem[i+1].vertex_buffer_index];
- size1 = hw_format_size[i];
- size2 = hw_format_size[i+1];
-
- OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
- R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
- OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
- OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
- }
-
- if (vertex_array_count & 1) {
- vb1 = &vbuf[velem[i].vertex_buffer_index];
- size1 = hw_format_size[i];
-
- OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
- OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
- }
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
}
for (i = 0; i < vertex_array_count; i++) {
- buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]);
- OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b);
+ buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+ OUT_CS_RELOC(buf);
}
END_CS;
}
@@ -919,7 +871,8 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
OUT_CS(r300->vertex_info.size |
(r300->vertex_info.size << 8));
OUT_CS(r300->draw_vbo_offset);
- OUT_CS_BUF_RELOC(r300->vbo, 0);
+ OUT_CS(0);
+ OUT_CS_RELOC(r300_resource(r300->vbo));
END_CS;
}
@@ -1097,17 +1050,6 @@ static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint1
END_CS;
}
-static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val)
-{
- CS_LOCALS(r300);
- BEGIN_CS(4);
- OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
- OUT_CS(start);
- OUT_CS(count);
- OUT_CS(val);
- END_CS;
-}
-
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1118,13 +1060,13 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
(struct r300_hyperz_state*)r300->hyperz_state.state;
struct r300_screen* r300screen = r300->screen;
uint32_t stride, offset = 0, height, offset_shift;
- struct r300_texture* tex;
+ struct r300_resource* tex;
int i;
- tex = r300_texture(fb->zsbuf->texture);
+ tex = r300_resource(fb->zsbuf->texture);
offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs;
- stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
+ stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level];
/* convert from pixels to 4x4 blocks */
stride = ALIGN_DIVUP(stride, 4);
@@ -1153,42 +1095,21 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_screen* r300screen = r300->screen;
- uint32_t stride, offset = 0;
- struct r300_texture* tex;
- uint32_t i, height;
- int mult, offset_shift;
-
- tex = r300_texture(fb->zsbuf->texture);
- stride = tex->desc.stride_in_pixels[fb->zsbuf->u.tex.level];
-
- offset = tex->zmask_mem[fb->zsbuf->u.tex.level]->ofs;
-
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
- offset_shift = 4;
- offset_shift += (r300screen->caps.num_frag_pipes / 2);
- stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes);
+ struct r300_resource *tex;
+ CS_LOCALS(r300);
- /* okay have width in pixels - divide by block width */
- stride = ALIGN_DIVUP(stride, mult);
- /* have width in blocks - divide by number of fragment pipes screen width */
- /* 16 blocks per dword */
- stride = ALIGN_DIVUP(stride, 16);
+ tex = r300_resource(fb->zsbuf->texture);
- for (i = 0; i < height; i++) {
- offset = i * stride;
- offset <<= offset_shift;
- r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
- }
+ BEGIN_CS(size);
+ OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
+ OUT_CS(0);
+ OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]);
+ OUT_CS(0);
+ END_CS;
/* Mark the current zbuffer's zmask as in use. */
- tex->zmask_in_use[fb->zsbuf->u.tex.level] = TRUE;
+ r300->zmask_in_use = TRUE;
+ r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1219,7 +1140,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_textures_state *texstate =
(struct r300_textures_state*)r300->textures_state.state;
- struct r300_texture *tex;
+ struct r300_resource *tex;
unsigned i;
boolean flushed = FALSE;
@@ -1227,16 +1148,16 @@ validate:
if (r300->fb_state.dirty) {
/* Color buffers... */
for (i = 0; i < fb->nr_cbufs; i++) {
- tex = r300_texture(fb->cbufs[i]->texture);
- assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0,
+ tex = r300_resource(fb->cbufs[i]->texture);
+ assert(tex && tex->buf && "cbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0,
r300_surface(fb->cbufs[i])->domain);
}
/* ...depth buffer... */
if (fb->zsbuf) {
- tex = r300_texture(fb->zsbuf->texture);
- assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0,
+ tex = r300_resource(fb->zsbuf->texture);
+ assert(tex && tex->buf && "zsbuf is marked, but NULL!");
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0,
r300_surface(fb->zsbuf)->domain);
}
}
@@ -1247,35 +1168,35 @@ validate:
continue;
}
- tex = r300_texture(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0);
+ tex = r300_resource(texstate->sampler_views[i]->base.texture);
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
- r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer,
+ r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
0, r300->query_current->domain);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
- r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf,
- r300_buffer(r300->vbo)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf,
+ r300_resource(r300->vbo)->domain, 0);
/* ...vertex buffers for HWTCL path... */
- if (do_validate_vertex_buffers) {
- struct pipe_resource **buf = r300->valid_vertex_buffer;
- struct pipe_resource **last = r300->valid_vertex_buffer +
- r300->vertex_buffer_count;
+ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
+ struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer;
+ struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer +
+ r300->vbuf_mgr->nr_real_vertex_buffers;
for (; buf != last; buf++) {
if (!*buf)
continue;
- r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf,
- r300_buffer(*buf)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
+ r300_resource(*buf)->domain, 0);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
- r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf,
- r300_buffer(index_buffer)->domain, 0);
+ r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
+ r300_resource(index_buffer)->domain, 0);
/* Now do the validation. */
if (!r300->rws->cs_validate(r300->cs)) {
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index b250532ba92..c77cc08539d 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -36,16 +36,24 @@ static void r300_flush(struct pipe_context* pipe,
struct pipe_fence_handle** fence)
{
struct r300_context *r300 = r300_context(pipe);
- struct r300_query *query;
struct r300_atom *atom;
- struct r300_fence **rfence = (struct r300_fence**)fence;
-
- u_upload_flush(r300->upload_vb);
- u_upload_flush(r300->upload_ib);
+ struct r300_winsys_bo **rfence = (struct r300_winsys_bo**)fence;
if (r300->draw && !r300->draw_vbo_locked)
r300_draw_flush_vbuf(r300);
+ if (rfence) {
+ /* Create a fence, which is a dummy BO. */
+ *rfence = r300->rws->buffer_create(r300->rws, 1, 1,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC,
+ R300_DOMAIN_GTT);
+ /* Add the fence as a dummy relocation. */
+ r300->rws->cs_add_reloc(r300->cs,
+ r300->rws->buffer_get_cs_handle(*rfence),
+ R300_DOMAIN_GTT, R300_DOMAIN_GTT);
+ }
+
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
@@ -62,32 +70,29 @@ static void r300_flush(struct pipe_context* pipe,
r300_mark_atom_dirty(r300, atom);
}
}
+ r300->vertex_arrays_dirty = TRUE;
/* Unmark HWTCL state for SWTCL. */
if (!r300->screen->caps.has_tcl) {
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
}
-
- r300->validate_buffers = TRUE;
- r300->upload_vb_validated = FALSE;
- r300->upload_ib_validated = FALSE;
} else {
- /* Even if hw is not dirty, we should at least reset the CS in case
- * the space checking failed for the first draw operation. */
- r300->rws->cs_flush(r300->cs);
- }
-
- /* reset flushed query */
- foreach(query, &r300->query_list) {
- query->flushed = TRUE;
+ if (rfence) {
+ /* We have to create a fence object, but the command stream is empty
+ * and we cannot emit an empty CS. We must write some regs then. */
+ CS_LOCALS(r300);
+ OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
+ r300->rws->cs_flush(r300->cs);
+ } else {
+ /* Even if hw is not dirty, we should at least reset the CS in case
+ * the space checking failed for the first draw operation. */
+ r300->rws->cs_flush(r300->cs);
+ }
}
- /* Create a new fence. */
- if (rfence) {
- *rfence = CALLOC_STRUCT(r300_fence);
- pipe_reference_init(&(*rfence)->reference, 1);
- (*rfence)->ctx = r300;
+ if (flags & PIPE_FLUSH_FRAME) {
+ r300->rws->cs_sync_flush(r300->cs);
}
}
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 6d4091dc87d..cec7473009a 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -152,13 +152,13 @@ static void get_external_state(
for (i = 0; i < texstate->sampler_state_count; i++) {
struct r300_sampler_state *s = texstate->sampler_states[i];
struct r300_sampler_view *v = texstate->sampler_views[i];
- struct r300_texture *t;
+ struct r300_resource *t;
if (!s || !v) {
continue;
}
- t = r300_texture(texstate->sampler_views[i]->base.texture);
+ t = r300_resource(texstate->sampler_views[i]->base.texture);
if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
state->unit[i].compare_mode_enabled = 1;
@@ -181,7 +181,7 @@ static void get_external_state(
state->unit[i].non_normalized_coords = !s->state.normalized_coords;
/* XXX this should probably take into account STR, not just S. */
- if (t->desc.is_npot) {
+ if (t->tex.is_npot) {
switch (s->state.wrap_s) {
case PIPE_TEX_WRAP_REPEAT:
state->unit[i].wrap_mode = RC_WRAP_REPEAT;
@@ -201,7 +201,7 @@ static void get_external_state(
state->unit[i].wrap_mode = RC_WRAP_NONE;
}
- if (t->desc.b.b.target == PIPE_TEXTURE_3D)
+ if (t->b.b.b.target == PIPE_TEXTURE_3D)
state->unit[i].clamp_and_scale_before_fetch = TRUE;
}
}
@@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer(
}
} else { /* r300 */
struct r300_fragment_program_code *code = &generic_code->code.r300;
-
- shader->cb_code_size = 19 +
- (r300->screen->caps.is_r400 ? 2 : 0) +
- code->alu.length * 4 +
- (code->tex.length ? (1 + code->tex.length) : 0) +
- imm_count * 5;
+ unsigned int alu_length = code->alu.length;
+ unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
+ unsigned int tex_length = code->tex.length;
+ unsigned int tex_iterations =
+ tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
+ unsigned int iterations =
+ alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
+ unsigned int bank = 0;
+
+ shader->cb_code_size = 15 +
+ /* R400_US_CODE_BANK */
+ (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
+ /* R400_US_CODE_EXT */
+ (r300->screen->caps.is_r400 ? 2 : 0) +
+ /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
+ (code->r390_mode ? (5 * alu_iterations) : 4) +
+ /* R400_US_ALU_EXT_ADDR_[0-63] */
+ (code->r390_mode ? (code->alu.length) : 0) +
+ /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
+ code->alu.length * 4 +
+ /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
+ (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
+ imm_count * 5;
NEW_CB(shader->cb_code, shader->cb_code_size);
- if (r300->screen->caps.is_r400)
- OUT_CB_REG(R400_US_CODE_BANK, 0);
-
OUT_CB_REG(R300_US_CONFIG, code->config);
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
+ if (code->r390_mode) {
+ OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
+ } else if (r300->screen->caps.is_r400) {
+ /* This register appears to affect shaders even if r390_mode is
+ * disabled, so it needs to be set to 0 for shaders that
+ * don't use r390_mode. */
+ OUT_CB_REG(R400_US_CODE_EXT, 0);
+ }
+
OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
OUT_CB_TABLE(code->code_addr, 4);
- OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].rgb_inst);
+ do {
+ unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
+ unsigned int bank_alu_offset = bank * 64;
+ unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
+ unsigned int bank_tex_offset = bank * 32;
+
+ if (r300->screen->caps.is_r400) {
+ OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
+ (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
+ }
+
+ if (bank_alu_length > 0) {
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
- OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].rgb_addr);
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
- OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].alpha_inst);
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
- OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- for (i = 0; i < code->alu.length; i++)
- OUT_CB(code->alu.inst[i].alpha_addr);
+ if (code->r390_mode) {
+ OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
+ for (i = 0; i < bank_alu_length; i++)
+ OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
+ }
+ }
+
+ if (bank_tex_length > 0) {
+ OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
+ OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
+ }
+
+ alu_length -= bank_alu_length;
+ tex_length -= bank_tex_length;
+ bank++;
+ } while(code->r390_mode && (alu_length > 0 || tex_length > 0));
- if (code->tex.length) {
- OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
- OUT_CB_TABLE(code->tex.inst, code->tex.length);
+ /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
+ * will be rendered incorrectly. */
+ if (r300->screen->caps.is_r400) {
+ OUT_CB_REG(R400_US_CODE_BANK,
+ code->r390_mode ? R400_R390_MODE_ENABLE : 0);
}
/* Emit immediates. */
@@ -384,12 +438,17 @@ static void r300_translate_fragment_shader(
compiler.code = &shader->code;
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.is_r400 = r300->screen->caps.is_r400;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
- compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+ compiler.Base.max_temp_regs =
+ compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
- compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+ compiler.Base.max_alu_insts =
+ (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
+ compiler.Base.max_tex_insts =
+ (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;
@@ -414,6 +473,13 @@ static void r300_translate_fragment_shader(
r300_tgsi_to_rc(&ttr, tokens);
+ if (ttr.error) {
+ fprintf(stderr, "r300 FP: Cannot translate a shader. "
+ "Using a dummy shader instead.\n");
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
if (!r300->screen->caps.is_r500 ||
compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index c22e307c679..873e0209d42 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -127,7 +127,7 @@ static boolean r300_can_hiz(struct r300_context *r300)
z->current_func, dsa_state->z_stencil_control);
return FALSE;
}
- }
+ }
return TRUE;
}
@@ -137,9 +137,8 @@ static void r300_update_hyperz(struct r300_context* r300)
(struct r300_hyperz_state*)r300->hyperz_state.state;
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- struct r300_texture *zstex =
- fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
- boolean zmask_in_use = FALSE;
+ struct r300_resource *zstex =
+ fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
boolean hiz_in_use = FALSE;
z->gb_z_peq_config = 0;
@@ -158,42 +157,40 @@ static void r300_update_hyperz(struct r300_context* r300)
if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
return;
- zmask_in_use = zstex->zmask_in_use[fb->zsbuf->u.tex.level];
hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level];
- /* Z fastfill. */
- if (zmask_in_use) {
- z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
- }
-
/* Zbuffer compression. */
- if (zmask_in_use && r300->z_compression) {
- z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
- if (r300->z_decomp_rd == false)
+ if (r300->zmask_in_use && !r300->zmask_locked) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
+ /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/
+ R300_RD_COMP_ENABLE;
+
+ if (!r300->zmask_decompress) {
z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
+ }
}
- /* RV350 and up optimizations. */
- /* The section 10.4.9 in the docs is a lie. */
- if (r300->z_compression == RV350_Z_COMPRESS_88)
+
+ if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
+ }
+
+ if (hiz_in_use && r300_can_hiz(r300)) {
+ z->zb_bw_cntl |= R300_HIZ_ENABLE |
+ r300_get_hiz_min(r300);
- if (hiz_in_use) {
- bool can_hiz = r300_can_hiz(r300);
- if (can_hiz) {
- z->zb_bw_cntl |= R300_HIZ_ENABLE;
- z->sc_hyperz |= R300_SC_HYPERZ_ENABLE;
- z->sc_hyperz |= r300_get_sc_hz_max(r300);
- z->zb_bw_cntl |= r300_get_hiz_min(r300);
+ z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
+ r300_get_sc_hz_max(r300);
+
+ if (r300->screen->caps.is_r500) {
+ z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 |
+ R500_HIZ_EQUAL_REJECT_ENABLE;
}
}
/* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
- z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
- z->zb_bw_cntl |=
- R500_HIZ_EQUAL_REJECT_ENABLE |
- R500_PEQ_PACKING_ENABLE |
- R500_COVERED_PTR_MASKING_ENABLE;
+ z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
+ R500_COVERED_PTR_MASKING_ENABLE;
}
}
@@ -297,26 +294,10 @@ static void r300_update_hiz_clear(struct r300_context *r300)
r300->hiz_clear.size = height * 4;
}
-static void r300_update_zmask_clear(struct r300_context *r300)
-{
- struct pipe_framebuffer_state *fb =
- (struct pipe_framebuffer_state*)r300->fb_state.state;
- uint32_t height;
- int mult;
-
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- mult = 8;
- else
- mult = 4;
-
- height = ALIGN_DIVUP(fb->zsbuf->height, mult);
-
- r300->zmask_clear.size = height * 4;
-}
-
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
@@ -324,64 +305,24 @@ void r300_update_hyperz_state(struct r300_context* r300)
if (r300->hiz_clear.dirty) {
r300_update_hiz_clear(r300);
}
- if (r300->zmask_clear.dirty) {
- r300_update_zmask_clear(r300);
- }
}
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf)
{
- struct r300_texture *tex;
+ struct r300_resource *tex;
uint32_t zsize, ndw;
int level = surf->base.u.tex.level;
- tex = r300_texture(surf->base.texture);
+ tex = r300_resource(surf->base.texture);
if (tex->hiz_mem[level])
return;
- zsize = tex->desc.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->desc.b.b.format);
+ zsize = tex->tex.layer_size_in_bytes[level];
+ zsize /= util_format_get_blocksize(tex->b.b.b.format);
ndw = ALIGN_DIVUP(zsize, 64);
tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0);
- return;
-}
-
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress)
-{
- int bsize = 256;
- uint32_t zsize, ndw;
- int level = surf->base.u.tex.level;
- struct r300_texture *tex;
-
- tex = r300_texture(surf->base.texture);
-
- /* We currently don't handle decompression for 3D textures and cubemaps
- * correctly. */
- if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
- tex->desc.b.b.target != PIPE_TEXTURE_2D &&
- tex->desc.b.b.target != PIPE_TEXTURE_RECT)
- return;
-
- /* Cannot flush zmask of 16-bit zbuffers. */
- if (util_format_get_blocksizebits(tex->desc.b.b.format) == 16)
- return;
-
- if (tex->zmask_mem[level])
- return;
-
- zsize = tex->desc.layer_size_in_bytes[level];
- zsize /= util_format_get_blocksize(tex->desc.b.b.format);
-
- /* each zmask dword represents 16 4x4 blocks - which is 256 pixels
- or 16 8x8 depending on the gb peq flag = 1024 pixels */
- if (compress == RV350_Z_COMPRESS_88)
- bsize = 1024;
-
- ndw = ALIGN_DIVUP(zsize, bsize);
- tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0);
- return;
}
boolean r300_hyperz_init_mm(struct r300_context *r300)
@@ -389,15 +330,9 @@ boolean r300_hyperz_init_mm(struct r300_context *r300)
struct r300_screen* r300screen = r300->screen;
int frag_pipes = r300screen->caps.num_frag_pipes;
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
- if (!r300->zmask_mm)
- return FALSE;
-
if (r300screen->caps.hiz_ram) {
r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
if (!r300->hiz_mm) {
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
return FALSE;
}
}
@@ -413,7 +348,4 @@ void r300_hyperz_destroy_mm(struct r300_context *r300)
u_mmDestroy(r300->hiz_mm);
r300->hiz_mm = NULL;
}
-
- u_mmDestroy(r300->zmask_mm);
- r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 30a23ec6493..d4c8e7c60a9 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -28,8 +28,8 @@ struct r300_context;
void r300_update_hyperz_state(struct r300_context* r300);
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
-void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
boolean r300_hyperz_init_mm(struct r300_context *r300);
void r300_hyperz_destroy_mm(struct r300_context *r300);
+
#endif
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 6223e043210..717485f43cb 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -57,10 +57,10 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
insert_at_tail(&r300->query_list, q);
/* Open up the occlusion query buffer. */
- q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
+ q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
q->domain);
- q->cs_buffer = r300->rws->buffer_get_cs_handle(r300->rws, q->buffer);
+ q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
return (struct pipe_query*)q;
}
@@ -68,10 +68,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
static void r300_destroy_query(struct pipe_context* pipe,
struct pipe_query* query)
{
- struct r300_context *r300 = r300_context(pipe);
struct r300_query* q = r300_query(query);
- r300->rws->buffer_reference(r300->rws, &q->buffer, NULL);
+ r300_winsys_bo_reference(&q->buf, NULL);
remove_from_list(q);
FREE(query);
}
@@ -128,16 +127,12 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_query *q = r300_query(query);
- unsigned flags, i;
+ unsigned i;
uint32_t temp, *map;
- uint64_t *result = (uint64_t*)vresult;
-
- if (!q->flushed)
- pipe->flush(pipe, 0, NULL);
-
- flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0);
- map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags);
+ map = r300->rws->buffer_map(q->buf, r300->cs,
+ PIPE_TRANSFER_READ |
+ (!wait ? PIPE_TRANSFER_DONTBLOCK : 0));
if (!map)
return FALSE;
@@ -148,9 +143,9 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
map++;
}
- r300->rws->buffer_unmap(r300->rws, q->buffer);
+ r300->rws->buffer_unmap(q->buf);
- *result = temp;
+ *((uint64_t*)vresult) = temp;
return TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index d1154dee40a..1d93dab2ca2 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* R4xx extended fragment shader registers. */
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
-# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01
-# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02
-# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04
+# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
-# define R400_ADDR0_EXT_A_MSB_BIT 0x10
-# define R400_ADDR1_EXT_A_MSB_BIT 0x20
-# define R400_ADDR2_EXT_A_MSB_BIT 0x40
+# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
# define R400_ADDRD_EXT_A_MSB_BIT 0x80
+
#define R400_US_CODE_BANK 0x46b8
# define R400_BANK_SHIFT 0
# define R400_BANK_MASK 0xf
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b35822c82f8..2ead8667bda 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -127,6 +127,20 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias)
END_CS;
}
+static void r300_emit_draw_init(struct r300_context *r300, unsigned mode,
+ unsigned min_index, unsigned max_index)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(5);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(max_index);
+ OUT_CS(min_index);
+ END_CS;
+}
+
/* This function splits the index bias value into two parts:
* - buffer_offset: the value that can be safely added to buffer offsets
* in r300_emit_vertex_arrays (it must yield a positive offset when added to
@@ -136,7 +150,7 @@ void r500_emit_index_bias(struct r300_context *r300, int index_bias)
static void r300_split_index_bias(struct r300_context *r300, int index_bias,
int *buffer_offset, int *index_offset)
{
- struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
+ struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer;
struct pipe_vertex_element *velem = r300->velems->velem;
unsigned i, size;
int max_neg_bias;
@@ -235,24 +249,11 @@ static boolean r300_emit_states(struct r300_context *r300,
/* Validate buffers and emit dirty state if needed. */
if (first_draw) {
- if (r300->validate_buffers) {
- if (!r300_emit_buffer_validate(r300, validate_vbos,
- index_buffer)) {
- fprintf(stderr, "r300: CS space validation failed. "
- "(not enough memory?) Skipping rendering.\n");
- return FALSE;
- }
-
- /* Consider the validation done only if everything was validated. */
- if (validate_vbos) {
- r300->validate_buffers = FALSE;
- if (r300->any_user_vbs)
- r300->upload_vb_validated = TRUE;
- if (r300->index_buffer.buffer &&
- r300_is_user_buffer(r300->index_buffer.buffer)) {
- r300->upload_ib_validated = TRUE;
- }
- }
+ if (!r300_emit_buffer_validate(r300, validate_vbos,
+ index_buffer)) {
+ fprintf(stderr, "r300: CS space validation failed. "
+ "(not enough memory?) Skipping rendering.\n");
+ return FALSE;
}
r300_emit_dirty_state(r300);
@@ -263,9 +264,17 @@ static boolean r300_emit_states(struct r300_context *r300,
r500_emit_index_bias(r300, 0);
}
- if (emit_vertex_arrays)
+ if (emit_vertex_arrays &&
+ (r300->vertex_arrays_dirty ||
+ r300->vertex_arrays_indexed != indexed ||
+ r300->vertex_arrays_offset != buffer_offset)) {
r300_emit_vertex_arrays(r300, buffer_offset, indexed);
+ r300->vertex_arrays_dirty = FALSE;
+ r300->vertex_arrays_indexed = indexed;
+ r300->vertex_arrays_offset = buffer_offset;
+ }
+
if (emit_vertex_arrays_swtcl)
r300_emit_vertex_arrays_swtcl(r300, indexed);
}
@@ -294,7 +303,8 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
flags |= PREP_FIRST_DRAW;
- return r300_emit_states(r300, flags, index_buffer, buffer_offset, index_bias);
+ return r300_emit_states(r300, flags, index_buffer, buffer_offset,
+ index_bias);
}
static boolean immd_is_good_idea(struct r300_context *r300,
@@ -325,17 +335,12 @@ static boolean immd_is_good_idea(struct r300_context *r300,
vbi = velem->vertex_buffer_index;
if (!checked[vbi]) {
- buf = r300->valid_vertex_buffer[vbi];
+ buf = r300->vbuf_mgr->real_vertex_buffer[vbi];
- if (!(r300_buffer(buf)->domain & R300_DOMAIN_GTT)) {
+ if ((r300_resource(buf)->domain != R300_DOMAIN_GTT)) {
return FALSE;
}
- if (r300_buffer_is_referenced(&r300->context, buf,
- R300_REF_CS | R300_REF_HW)) {
- /* It's a very bad idea to map it... */
- return FALSE;
- }
checked[vbi] = TRUE;
}
}
@@ -346,10 +351,9 @@ static boolean immd_is_good_idea(struct r300_context *r300,
* The HWTCL draw functions. *
****************************************************************************/
-static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_draw_arrays_immediate(struct r300_context *r300,
+ unsigned mode, unsigned start,
+ unsigned count)
{
struct pipe_vertex_element* velem;
struct pipe_vertex_buffer* vbuf;
@@ -360,7 +364,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
unsigned vertex_size = r300->velems->vertex_size_dwords;
/* The number of dwords for this draw operation. */
- unsigned dwords = 9 + count * vertex_size;
+ unsigned dwords = 4 + count * vertex_size;
/* Size of the vertex element, in dwords. */
unsigned size[PIPE_MAX_ATTRIBS];
@@ -370,9 +374,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
unsigned stride[PIPE_MAX_ATTRIBS];
/* Mapped vertex buffers. */
- uint32_t* map[PIPE_MAX_ATTRIBS];
+ uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
uint32_t* mapelem[PIPE_MAX_ATTRIBS];
- struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};
CS_LOCALS(r300);
@@ -382,29 +385,25 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
/* Calculate the vertex size, offsets, strides etc. and map the buffers. */
for (i = 0; i < vertex_element_count; i++) {
velem = &r300->velems->velem[i];
- size[i] = r300->velems->hw_format_size[i] / 4;
+ size[i] = r300->velems->format_size[i] / 4;
vbi = velem->vertex_buffer_index;
- vbuf = &r300->vertex_buffer[vbi];
+ vbuf = &r300->vbuf_mgr->vertex_buffer[vbi];
stride[i] = vbuf->stride / 4;
/* Map the buffer. */
- if (!transfer[vbi]) {
- map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
- r300->valid_vertex_buffer[vbi],
- PIPE_TRANSFER_READ,
- &transfer[vbi]);
+ if (!map[vbi]) {
+ map[vbi] = (uint32_t*)r300->rws->buffer_map(
+ r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf,
+ r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
}
mapelem[i] = map[vbi] + (velem->src_offset / 4);
}
+ r300_emit_draw_init(r300, mode, 0, count-1);
+
BEGIN_CS(dwords);
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(count - 1);
- OUT_CS(0);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
r300_translate_primitive(mode));
@@ -421,9 +420,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
for (i = 0; i < vertex_element_count; i++) {
vbi = r300->velems->velem[i].vertex_buffer_index;
- if (transfer[vbi]) {
- pipe_buffer_unmap(&r300->context, transfer[vbi]);
- transfer[vbi] = NULL;
+ if (map[vbi]) {
+ r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf);
+ map[vbi] = NULL;
}
}
}
@@ -441,15 +440,12 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
return;
}
- BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
+ r300_emit_draw_init(r300, mode, 0, count-1);
+
+ BEGIN_CS(2 + (alt_num_verts ? 2 : 0));
if (alt_num_verts) {
OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
}
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(count - 1);
- OUT_CS(0);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
r300_translate_primitive(mode) |
@@ -471,22 +467,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
boolean alt_num_verts = count > 65535;
CS_LOCALS(r300);
- if (count >= (1 << 24)) {
+ if (count >= (1 << 24) || maxIndex >= (1 << 24)) {
fprintf(stderr, "r300: Got a huge number of vertices: %i, "
- "refusing to render.\n", count);
+ "refusing to render (maxIndex: %i).\n", count, maxIndex);
return;
}
DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
count, minIndex, maxIndex);
- BEGIN_CS(5);
- OUT_CS_REG(R300_GA_COLOR_CONTROL,
- r300_provoking_vertex_fixes(r300, mode));
- OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
- OUT_CS(maxIndex);
- OUT_CS(minIndex);
- END_CS;
+ r300_emit_draw_init(r300, mode, minIndex, maxIndex);
/* If start is odd, render the first triangle with indices embedded
* in the command stream. This will increase start by 3 and make it
@@ -527,30 +517,105 @@ static void r300_emit_draw_elements(struct r300_context *r300,
(alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
}
- /* INDX_BUFFER is a truly special packet3.
- * Unlike most other packet3, where the offset is after the count,
- * the order is reversed, so the relocation ends up carrying the
- * size of the indexbuf instead of the offset.
- */
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
(0 << R300_INDX_BUFFER_SKIP_SHIFT));
OUT_CS(offset_dwords << 2);
- OUT_CS_BUF_RELOC(indexBuffer, count_dwords);
+ OUT_CS(count_dwords);
+ OUT_CS_RELOC(r300_resource(indexBuffer));
+ END_CS;
+}
+
+static void r300_draw_elements_immediate(struct r300_context *r300,
+ int indexBias, unsigned minIndex,
+ unsigned maxIndex, unsigned mode,
+ unsigned start, unsigned count)
+{
+ uint8_t *ptr1;
+ uint16_t *ptr2;
+ uint32_t *ptr4;
+ unsigned index_size = r300->index_buffer.index_size;
+ unsigned i, count_dwords = index_size == 4 ? count : (count + 1) / 2;
+ CS_LOCALS(r300);
+
+ /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
+ PREP_INDEXED, NULL, 2+count_dwords, 0, indexBias))
+ return;
+ r300_emit_draw_init(r300, mode, minIndex, maxIndex);
+
+ BEGIN_CS(2 + count_dwords);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords);
+
+ switch (index_size) {
+ case 1:
+ ptr1 = r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr1 += start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300_translate_primitive(mode));
+
+ if (indexBias && !r300->screen->caps.index_bias_supported) {
+ for (i = 0; i < count-1; i += 2)
+ OUT_CS(((ptr1[i+1] + indexBias) << 16) |
+ (ptr1[i] + indexBias));
+
+ if (count & 1)
+ OUT_CS(ptr1[i] + indexBias);
+ } else {
+ for (i = 0; i < count-1; i += 2)
+ OUT_CS(((ptr1[i+1]) << 16) |
+ (ptr1[i] ));
+
+ if (count & 1)
+ OUT_CS(ptr1[i]);
+ }
+ break;
+
+ case 2:
+ ptr2 = (uint16_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr2 += start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300_translate_primitive(mode));
+
+ if (indexBias && !r300->screen->caps.index_bias_supported) {
+ for (i = 0; i < count-1; i += 2)
+ OUT_CS(((ptr2[i+1] + indexBias) << 16) |
+ (ptr2[i] + indexBias));
+
+ if (count & 1)
+ OUT_CS(ptr2[i] + indexBias);
+ } else {
+ OUT_CS_TABLE(ptr2, count_dwords);
+ }
+ break;
+
+ case 4:
+ ptr4 = (uint32_t*)r300_resource(r300->index_buffer.buffer)->b.user_ptr;
+ ptr4 += start;
+
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
+ r300_translate_primitive(mode));
+
+ if (indexBias && !r300->screen->caps.index_bias_supported) {
+ for (i = 0; i < count; i++)
+ OUT_CS(ptr4[i] + indexBias);
+ } else {
+ OUT_CS_TABLE(ptr4, count_dwords);
+ }
+ break;
+ }
END_CS;
}
-/* This is the fast-path drawing & emission for HW TCL. */
-static void r300_draw_range_elements(struct pipe_context* pipe,
- int indexBias,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
+static void r300_draw_elements(struct r300_context *r300, int indexBias,
+ unsigned minIndex, unsigned maxIndex,
+ unsigned mode, unsigned start, unsigned count)
{
- struct r300_context* r300 = r300_context(pipe);
struct pipe_resource *indexBuffer = r300->index_buffer.buffer;
unsigned indexSize = r300->index_buffer.index_size;
struct pipe_resource* orgIndexBuffer = indexBuffer;
@@ -570,30 +635,28 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
/* Fallback for misaligned ushort indices. */
if (indexSize == 2 && (start & 1) &&
- !r300_is_user_buffer(indexBuffer)) {
- struct pipe_transfer *transfer;
- struct pipe_resource *userbuf;
-
- uint16_t *ptr = pipe_buffer_map(pipe, indexBuffer,
- PIPE_TRANSFER_READ, &transfer);
+ !r300_resource(indexBuffer)->b.user_ptr) {
+ /* If we got here, then orgIndexBuffer == indexBuffer. */
+ uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf,
+ r300->cs,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
if (mode == PIPE_PRIM_TRIANGLES) {
memcpy(indices3, ptr + start, 6);
} else {
/* Copy the mapped index buffer directly to the upload buffer.
* The start index will be aligned simply from the fact that
- * every sub-buffer in u_upload_mgr is aligned. */
- userbuf = pipe->screen->user_buffer_create(pipe->screen,
- ptr, 0,
- PIPE_BIND_INDEX_BUFFER);
- indexBuffer = userbuf;
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count);
- pipe_resource_reference(&userbuf, NULL);
+ * every sub-buffer in the upload buffer is aligned. */
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start,
+ count, (uint8_t*)ptr);
}
- pipe_buffer_unmap(pipe, transfer);
+ r300->rws->buffer_unmap(r300_resource(orgIndexBuffer)->buf);
} else {
- if (r300_is_user_buffer(indexBuffer))
- r300_upload_index_buffer(r300, &indexBuffer, indexSize, &start, count);
+ if (r300_resource(indexBuffer)->b.user_ptr)
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize,
+ &start, count,
+ r300_resource(indexBuffer)->b.user_ptr);
}
/* 19 dwords for emit_draw_elements. Give up if the function fails. */
@@ -607,7 +670,11 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
minIndex, maxIndex, mode, start, count, indices3);
} else {
do {
- short_count = MIN2(count, 65534);
+ if (indexSize == 2 && (start & 1))
+ short_count = MIN2(count, 65535);
+ else
+ short_count = MIN2(count, 65534);
+
r300_emit_draw_elements(r300, indexBuffer, indexSize,
minIndex, maxIndex,
mode, start, short_count, indices3);
@@ -631,43 +698,38 @@ done:
}
}
-static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+static void r300_draw_arrays(struct r300_context *r300, unsigned mode,
unsigned start, unsigned count)
{
- struct r300_context* r300 = r300_context(pipe);
boolean alt_num_verts = r300->screen->caps.is_r500 &&
count > 65536 &&
r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
unsigned short_count;
- if (immd_is_good_idea(r300, count)) {
- r300_emit_draw_arrays_immediate(r300, mode, start, count);
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (!r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
+ NULL, 9, start, 0))
+ return;
+
+ if (alt_num_verts || count <= 65535) {
+ r300_emit_draw_arrays(r300, mode, count);
} else {
- /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
- if (!r300_prepare_for_rendering(r300,
- PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
- NULL, 9, start, 0))
- return;
+ do {
+ short_count = MIN2(count, 65535);
+ r300_emit_draw_arrays(r300, mode, short_count);
- if (alt_num_verts || count <= 65535) {
- r300_emit_draw_arrays(r300, mode, count);
- } else {
- do {
- short_count = MIN2(count, 65535);
- r300_emit_draw_arrays(r300, mode, short_count);
-
- start += short_count;
- count -= short_count;
-
- /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
- if (count) {
- if (!r300_prepare_for_rendering(r300,
- PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
- start, 0))
- return;
- }
- } while (count);
- }
+ start += short_count;
+ count -= short_count;
+
+ /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
+ if (count) {
+ if (!r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
+ start, 0))
+ return;
+ }
+ } while (count);
}
}
@@ -676,83 +738,45 @@ static void r300_draw_vbo(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
unsigned count = info->count;
- boolean translate = FALSE;
+ boolean buffers_updated, uploader_flushed;
boolean indexed = info->indexed && r300->index_buffer.buffer;
- unsigned min_index = 0;
- unsigned max_index = r300->vertex_buffer_max_index;
+ unsigned start_indexed = info->start + r300->index_buffer.offset;
+ int max_index = MIN2(r300->vbuf_mgr->max_index, info->max_index);
- if (r300->skip_rendering) {
+ if (r300->skip_rendering ||
+ !u_trim_pipe_prim(info->mode, &count)) {
return;
}
- if (!u_trim_pipe_prim(info->mode, &count)) {
- return;
+ /* Start the vbuf manager and update buffers if needed. */
+ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info,
+ &buffers_updated, &uploader_flushed);
+ if (buffers_updated) {
+ r300->vertex_arrays_dirty = TRUE;
}
- if (indexed) {
- int real_min_index, real_max_index;
- /* Compute the start for draw_elements, taking the offset into account. */
- unsigned start_indexed =
- info->start +
- (r300->index_buffer.offset / r300->index_buffer.index_size);
-
- assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0);
-
- /* Index buffer range checking. */
- if ((start_indexed + count) * r300->index_buffer.index_size >
- r300->index_buffer.buffer->width0) {
- fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n");
- return;
- }
-
- min_index = MAX2(min_index, info->min_index);
- max_index = MIN2(max_index, info->max_index);
- real_min_index = (int)min_index - info->index_bias;
- real_max_index = (int)max_index - info->index_bias;
-
- if (max_index >= (1 << 24) - 1) {
- fprintf(stderr, "r300: Invalid max_index: %i. Skipping rendering...\n", max_index);
- return;
- }
-
- r300_update_derived_state(r300);
-
- /* Set up the fallback for an incompatible vertex layout if needed. */
- if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
- r300_begin_vertex_translate(r300, real_min_index, real_max_index);
- translate = TRUE;
- }
+ /* Draw. */
+ r300_update_derived_state(r300);
- /* Upload vertex buffers. */
- if (r300->any_user_vbs) {
- r300_upload_user_buffers(r300, real_min_index, real_max_index);
+ if (indexed) {
+ if (count <= 8 &&
+ r300_resource(r300->index_buffer.buffer)->b.user_ptr) {
+ r300_draw_elements_immediate(r300, info->index_bias,
+ info->min_index, max_index,
+ info->mode, start_indexed, count);
+ } else {
+ r300_draw_elements(r300, info->index_bias, info->min_index,
+ max_index, info->mode, start_indexed, count);
}
-
- r300_draw_range_elements(pipe, info->index_bias, min_index, max_index,
- info->mode, start_indexed, count);
} else {
- min_index = MAX2(min_index, info->start);
- max_index = MIN2(max_index, info->start + count - 1);
-
- r300_update_derived_state(r300);
-
- /* Set up the fallback for an incompatible vertex layout if needed. */
- if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
- r300_begin_vertex_translate(r300, min_index, max_index);
- translate = TRUE;
- }
-
- /* Upload vertex buffers. */
- if (r300->any_user_vbs) {
- r300_upload_user_buffers(r300, min_index, max_index);
+ if (immd_is_good_idea(r300, count)) {
+ r300_draw_arrays_immediate(r300, info->mode, info->start, count);
+ } else {
+ r300_draw_arrays(r300, info->mode, info->start, count);
}
-
- r300_draw_arrays(pipe, info->mode, info->start, count);
}
- if (translate) {
- r300_end_vertex_translate(r300);
- }
+ u_vbuf_mgr_draw_end(r300->vbuf_mgr);
}
/****************************************************************************
@@ -787,11 +811,12 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
(indexed ? PREP_INDEXED : 0),
indexed ? 256 : 6);
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (r300->vertex_buffer[i].buffer) {
+ for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
+ if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
void *buf = pipe_buffer_map(pipe,
- r300->vertex_buffer[i].buffer,
- PIPE_TRANSFER_READ,
+ r300->vbuf_mgr->vertex_buffer[i].buffer,
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
&vb_transfer[i]);
draw_set_mapped_vertex_buffer(r300->draw, i, buf);
}
@@ -799,7 +824,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
if (indexed) {
indices = pipe_buffer_map(pipe, r300->index_buffer.buffer,
- PIPE_TRANSFER_READ, &ib_transfer);
+ PIPE_TRANSFER_READ |
+ PIPE_TRANSFER_UNSYNCHRONIZED, &ib_transfer);
}
draw_set_mapped_index_buffer(r300->draw, indices);
@@ -810,8 +836,8 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
draw_flush(r300->draw);
r300->draw_vbo_locked = FALSE;
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (r300->vertex_buffer[i].buffer) {
+ for (i = 0; i < r300->vbuf_mgr->nr_vertex_buffers; i++) {
+ if (r300->vbuf_mgr->vertex_buffer[i].buffer) {
pipe_buffer_unmap(pipe, vb_transfer[i]);
draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
}
@@ -874,10 +900,10 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
pipe_resource_reference(&r300->vbo, NULL);
r300->vbo = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
R300_MAX_DRAW_VBO_SIZE);
r300->draw_vbo_offset = 0;
r300->draw_vbo_size = R300_MAX_DRAW_VBO_SIZE;
- r300->validate_buffers = TRUE;
}
r300render->vertex_size = vertex_size;
@@ -896,7 +922,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
r300->vbo,
- PIPE_TRANSFER_WRITE,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED,
&r300render->vbo_transfer);
assert(r300render->vbo_ptr);
@@ -963,7 +990,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
if (r300->draw_first_emitted) {
if (!r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
- NULL, 6, 0, 0))
+ NULL, dwords, 0, 0))
return;
} else {
if (!r300_emit_states(r300,
@@ -972,23 +999,6 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
return;
}
- /* Uncomment to dump all VBOs rendered through this interface.
- * Slow and noisy!
- ptr = pipe_buffer_map(&r300render->r300->context,
- r300render->vbo, PIPE_TRANSFER_READ,
- &r300render->vbo_transfer);
-
- for (i = 0; i < count; i++) {
- printf("r300: Vertex %d\n", i);
- draw_dump_emitted_vertex(&r300->vertex_info, ptr);
- ptr += r300->vertex_info.size * 4;
- printf("\n");
- }
-
- pipe_buffer_unmap(&r300render->r300->context, r300render->vbo,
- r300render->vbo_transfer);
- */
-
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
r300_provoking_vertex_fixes(r300, r300render->prim));
@@ -1250,7 +1260,7 @@ static void r300_resource_resolve(struct pipe_context* pipe,
aa->aaresolve_ctl =
R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
- r300->aa_state.size = 12;
+ r300->aa_state.size = 10;
r300_mark_atom_dirty(r300, &r300->aa_state);
/* Resolve the surface. */
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
index c48062c8084..f8c7558f4b4 100644
--- a/src/gallium/drivers/r300/r300_render_translate.c
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -20,207 +20,64 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-/**
- * The functions below translate vertex and index buffers to the layout
- * compatible with the hardware, so that all vertex and index fetches are
- * DWORD-aligned and all used vertex and index formats are supported.
- * For indices, an optional index offset is added to each index.
- */
-
#include "r300_context.h"
-#include "translate/translate.h"
#include "util/u_index_modify.h"
+#include "util/u_upload_mgr.h"
-/* XXX Optimization: use min_index and translate only that range. */
-/* XXX Use the uploader. */
-void r300_begin_vertex_translate(struct r300_context *r300,
- int min_index, int max_index)
-{
- struct pipe_context *pipe = &r300->context;
- struct translate_key key = {0};
- struct translate_element *te;
- unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
- struct translate *tr;
- struct r300_vertex_element_state *ve = r300->velems;
- boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
- void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
- struct pipe_resource *out_buffer;
- unsigned i, num_verts;
- unsigned slot;
-
- /* Initialize the translate key, i.e. the recipe how vertices should be
- * translated. */
- for (i = 0; i < ve->count; i++) {
- struct pipe_vertex_buffer *vb =
- &r300->vertex_buffer[ve->velem[i].vertex_buffer_index];
- enum pipe_format output_format = ve->hw_format[i];
- unsigned output_format_size = ve->hw_format_size[i];
-
- /* Check for support. */
- if (ve->velem[i].src_format == ve->hw_format[i] &&
- (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 &&
- vb->stride % 4 == 0) {
- continue;
- }
-
- /* Workaround for translate: output floats instead of halfs. */
- switch (output_format) {
- case PIPE_FORMAT_R16_FLOAT:
- output_format = PIPE_FORMAT_R32_FLOAT;
- output_format_size = 4;
- break;
- case PIPE_FORMAT_R16G16_FLOAT:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- output_format_size = 8;
- break;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- output_format_size = 12;
- break;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- output_format_size = 16;
- break;
- default:;
- }
-
- /* Add this vertex element. */
- te = &key.element[key.nr_elements];
- /*te->type;
- te->instance_divisor;*/
- te->input_buffer = ve->velem[i].vertex_buffer_index;
- te->input_format = ve->velem[i].src_format;
- te->input_offset = vb->buffer_offset + ve->velem[i].src_offset;
- te->output_format = output_format;
- te->output_offset = key.output_stride;
-
- key.output_stride += output_format_size;
- vb_translated[ve->velem[i].vertex_buffer_index] = TRUE;
- tr_elem_index[i] = key.nr_elements;
- key.nr_elements++;
- }
-
- /* Get a translate object. */
- tr = translate_cache_find(r300->tran.translate_cache, &key);
-
- /* Map buffers we want to translate. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (vb_translated[i]) {
- struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
-
- vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
- PIPE_TRANSFER_READ, &vb_transfer[i]);
-
- tr->set_buffer(tr, i, vb_map[i], vb->stride, max_index);
- }
- }
-
- /* Create and map the output buffer. */
- num_verts = max_index + 1;
-
- out_buffer = pipe_buffer_create(&r300->screen->screen,
- PIPE_BIND_VERTEX_BUFFER,
- key.output_stride * num_verts);
-
- out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
- &out_transfer);
-
- /* Translate. */
- tr->run(tr, 0, num_verts, 0, out_map);
-
- /* Unmap all buffers. */
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, vb_transfer[i]);
- }
- }
-
- pipe_buffer_unmap(pipe, out_transfer);
-
- /* Setup the new vertex buffer in the first free slot. */
- slot = ~0;
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
-
- if (!vb->buffer) {
- pipe_resource_reference(&r300->valid_vertex_buffer[i], out_buffer);
- vb->buffer_offset = 0;
- vb->stride = key.output_stride;
- slot = i;
- /* XXX probably need to preserve the real count for u_blitter_save_*. */
- r300->vertex_buffer_count = MAX2(r300->vertex_buffer_count, i+1);
- r300->validate_buffers = TRUE;
- break;
- }
- }
- /* XXX This may fail. */
- assert(slot != ~0);
-
- /* Save and replace vertex elements. */
- {
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- r300->tran.saved_velems = r300->velems;
-
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->velem[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->velem[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = slot;
- } else {
- memcpy(&new_velems[i], &ve->velem[i],
- sizeof(struct pipe_vertex_element));
- }
- }
-
- r300->tran.new_velems =
- pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems);
- }
-
- pipe_resource_reference(&out_buffer, NULL);
-}
-void r300_end_vertex_translate(struct r300_context *r300)
-{
- struct pipe_context *pipe = &r300->context;
-
- /* Restore vertex elements. */
- pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems);
- pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems);
-}
-
-/* XXX Use the uploader. */
void r300_translate_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned *index_size, unsigned index_offset,
unsigned *start, unsigned count)
{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+ boolean flushed;
+
switch (*index_size) {
- case 1:
- util_shorten_ubyte_elts(&r300->context, index_buffer, index_offset, *start, count);
- *index_size = 2;
- *start = 0;
- r300->validate_buffers = TRUE;
- break;
+ case 1:
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r300->context, *index_buffer, index_offset,
+ *start, count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *index_size = 2;
+ *start = out_offset / 2;
+ break;
+
+ case 2:
+ if (index_offset) {
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_rebuild_ushort_elts_to_userptr(&r300->context, *index_buffer,
+ index_offset, *start,
+ count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *start = out_offset / 2;
+ }
+ break;
- case 2:
- if (index_offset) {
- util_rebuild_ushort_elts(&r300->context, index_buffer, index_offset, *start, count);
- *start = 0;
- r300->validate_buffers = TRUE;
- }
- break;
+ case 4:
+ if (index_offset) {
+ u_upload_alloc(r300->vbuf_mgr->uploader, 0, count * 4,
+ &out_offset, &out_buffer, &flushed, &ptr);
- case 4:
- if (index_offset) {
- util_rebuild_uint_elts(&r300->context, index_buffer, index_offset, *start, count);
- *start = 0;
- r300->validate_buffers = TRUE;
- }
- break;
+ util_rebuild_uint_elts_to_userptr(&r300->context, *index_buffer,
+ index_offset, *start,
+ count, ptr);
+
+ *index_buffer = NULL;
+ pipe_resource_reference(index_buffer, out_buffer);
+ *start = out_offset / 4;
+ }
+ break;
}
}
diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c
index dd1df970594..f3d8c5b889f 100644
--- a/src/gallium/drivers/r300/r300_resource.c
+++ b/src/gallium/drivers/r300/r300_resource.c
@@ -38,26 +38,22 @@ r300_resource_create(struct pipe_screen *screen,
}
-static struct pipe_resource *
-r300_resource_from_handle(struct pipe_screen * screen,
- const struct pipe_resource *templ,
- struct winsys_handle *whandle)
+static unsigned r300_resource_is_referenced_by_cs(struct pipe_context *context,
+ struct pipe_resource *buf,
+ unsigned level, int layer)
{
- if (templ->target == PIPE_BUFFER)
- return NULL;
- else
- return r300_texture_from_handle(screen, templ, whandle);
+ return r300_buffer_is_referenced(context, buf);
}
void r300_init_resource_functions(struct r300_context *r300)
{
r300->context.get_transfer = u_get_transfer_vtbl;
r300->context.transfer_map = u_transfer_map_vtbl;
- r300->context.transfer_flush_region = u_transfer_flush_region_vtbl;
+ r300->context.transfer_flush_region = u_default_transfer_flush_region;
r300->context.transfer_unmap = u_transfer_unmap_vtbl;
r300->context.transfer_destroy = u_transfer_destroy_vtbl;
r300->context.transfer_inline_write = u_transfer_inline_write_vtbl;
- r300->context.is_resource_referenced = u_is_resource_referenced_vtbl;
+ r300->context.is_resource_referenced = r300_resource_is_referenced_by_cs;
r300->context.create_surface = r300_create_surface;
r300->context.surface_destroy = r300_surface_destroy;
}
@@ -65,8 +61,8 @@ void r300_init_resource_functions(struct r300_context *r300)
void r300_init_screen_resource_functions(struct r300_screen *r300screen)
{
r300screen->screen.resource_create = r300_resource_create;
- r300screen->screen.resource_from_handle = r300_resource_from_handle;
- r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl;
+ r300screen->screen.resource_from_handle = r300_texture_from_handle;
+ r300screen->screen.resource_get_handle = r300_resource_get_handle;
r300screen->screen.resource_destroy = u_resource_destroy_vtbl;
r300screen->screen.user_buffer_create = r300_user_buffer_create;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c75aeaa10a7..77a9c6ad86f 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -34,10 +34,6 @@
#include "draw/draw_context.h"
-#ifdef HAVE_LLVM
-#include "gallivm/lp_bld_init.h"
-#endif
-
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
*
@@ -87,12 +83,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen)
static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
- boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
- /* XXX extended shader capabilities of r400 unimplemented */
- is_r400 = FALSE;
-
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
@@ -129,11 +121,13 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DUAL_SOURCE_BLEND:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
+ case PIPE_CAP_DEPTH_CLAMP:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_INSTANCED_DRAWING:
+ case PIPE_CAP_ARRAY_TEXTURES:
return 0;
/* Texturing. */
@@ -175,9 +169,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
- /* XXX extended shader capabilities of r400 unimplemented */
- is_r400 = FALSE;
-
switch (shader)
{
case PIPE_SHADER_FRAGMENT:
@@ -407,6 +398,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
util_slab_destroy(&r300screen->pool_buffers);
+ pipe_mutex_destroy(r300screen->num_contexts_mutex);
if (rws)
rws->destroy(rws);
@@ -418,32 +410,28 @@ static void r300_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
- struct r300_fence **oldf = (struct r300_fence**)ptr;
- struct r300_fence *newf = (struct r300_fence*)fence;
-
- if (pipe_reference(&(*oldf)->reference, &newf->reference))
- FREE(*oldf);
-
- *ptr = fence;
+ r300_winsys_bo_reference((struct r300_winsys_bo**)ptr,
+ (struct r300_winsys_bo*)fence);
}
static int r300_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flags)
{
- struct r300_fence *rfence = (struct r300_fence*)fence;
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
+ struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence;
- return rfence->signalled ? 0 : 1; /* 0 == success */
+ return !rws->buffer_is_busy(rfence) ? 0 : 1; /* 0 == success */
}
static int r300_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
unsigned flags)
{
- struct r300_fence *rfence = (struct r300_fence*)fence;
+ struct r300_winsys_screen *rws = r300_screen(screen)->rws;
+ struct r300_winsys_bo *rfence = (struct r300_winsys_bo*)fence;
- r300_finish(rfence->ctx);
- rfence->signalled = TRUE;
+ rws->buffer_wait(rfence);
return 0; /* 0 == success */
}
@@ -463,12 +451,19 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
r300_init_debug(r300screen);
r300_parse_chipset(&r300screen->caps);
+ if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
+ r300screen->caps.zmask_ram = 0;
+ if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
+ r300screen->caps.hiz_ram = 0;
+
r300screen->caps.index_bias_supported =
r300screen->caps.is_r500 &&
rws->get_value(rws, R300_VID_DRM_2_3_0);
+ pipe_mutex_init(r300screen->num_contexts_mutex);
+
util_slab_create(&r300screen->pool_buffers,
- sizeof(struct r300_buffer), 64,
+ sizeof(struct r300_resource), 64,
UTIL_SLAB_SINGLETHREADED);
r300screen->rws = rws;
@@ -490,9 +485,5 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws)
util_format_s3tc_init();
-#ifdef HAVE_LLVM
- lp_build_init();
-#endif
-
return &r300screen->screen;
}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 752f53b7579..576f9c1f4a9 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -52,6 +52,7 @@ struct r300_screen {
/* The number of created contexts to know whether we have multiple
* contexts or not. */
int num_contexts;
+ pipe_mutex num_contexts_mutex;
};
@@ -94,6 +95,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_HYPERZ (1 << 12)
#define DBG_SCISSOR (1 << 13)
#define DBG_UPLOAD (1 << 14)
+#define DBG_INFO (1 << 15)
/* Features. */
#define DBG_ANISOHQ (1 << 16)
#define DBG_NO_TILING (1 << 17)
@@ -101,6 +103,8 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_FAKE_OCC (1 << 19)
#define DBG_NO_OPT (1 << 20)
#define DBG_NO_CBZB (1 << 21)
+#define DBG_NO_ZMASK (1 << 22)
+#define DBG_NO_HIZ (1 << 23)
/* Statistics. */
#define DBG_P_STAT (1 << 25)
/*@}*/
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index cc3c1d7687e..1045911f3ae 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -34,117 +34,50 @@
#include "r300_winsys.h"
unsigned r300_buffer_is_referenced(struct pipe_context *context,
- struct pipe_resource *buf,
- enum r300_reference_domain domain)
+ struct pipe_resource *buf)
{
struct r300_context *r300 = r300_context(context);
- struct r300_buffer *rbuf = r300_buffer(buf);
+ struct r300_resource *rbuf = r300_resource(buf);
- if (r300_is_user_buffer(buf))
+ if (rbuf->b.user_ptr || rbuf->constant_buffer)
return PIPE_UNREFERENCED;
- if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, domain))
+ if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf))
return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
return PIPE_UNREFERENCED;
}
-static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer)
-{
- return r300_buffer_is_referenced(context, buf, R300_REF_CS);
-}
-
void r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size, unsigned *start,
- unsigned count)
+ unsigned count, uint8_t *ptr)
{
unsigned index_offset;
- uint8_t *ptr = r300_buffer(*index_buffer)->user_buffer;
boolean flushed;
*index_buffer = NULL;
- u_upload_data(r300->upload_ib,
+ u_upload_data(r300->vbuf_mgr->uploader,
0, count * index_size,
ptr + (*start * index_size),
&index_offset,
index_buffer, &flushed);
*start = index_offset / index_size;
-
- if (flushed || !r300->upload_ib_validated) {
- r300->upload_ib_validated = FALSE;
- r300->validate_buffers = TRUE;
- }
-}
-
-void r300_upload_user_buffers(struct r300_context *r300,
- int min_index, int max_index)
-{
- int i, nr = r300->velems->count;
- unsigned count = max_index + 1 - min_index;
- boolean flushed;
- boolean uploaded[16] = {0};
-
- for (i = 0; i < nr; i++) {
- unsigned index = r300->velems->velem[i].vertex_buffer_index;
- struct pipe_vertex_buffer *vb = &r300->vertex_buffer[index];
- struct r300_buffer *userbuf = r300_buffer(vb->buffer);
-
- if (userbuf && userbuf->user_buffer && !uploaded[index]) {
- unsigned first, size;
-
- if (vb->stride) {
- first = vb->stride * min_index;
- size = vb->stride * count;
- } else {
- first = 0;
- size = r300->velems->hw_format_size[i];
- }
-
- DBG(r300, DBG_UPLOAD,
- "Uploading %i bytes, index: %i, buffer: %p, userptr: %p "
- "offset: %i, stride: %i.\n",
- size, index, userbuf, userbuf->user_buffer,
- vb->buffer_offset, vb->stride);
-
- u_upload_data(r300->upload_vb, first, size,
- userbuf->user_buffer + first,
- &vb->buffer_offset,
- &r300->valid_vertex_buffer[index],
- &flushed);
-
- vb->buffer_offset -= first;
-
- r300->vertex_arrays_dirty = TRUE;
-
- if (flushed || !r300->upload_vb_validated) {
- r300->upload_vb_validated = FALSE;
- r300->validate_buffers = TRUE;
- }
- uploaded[index] = TRUE;
- } else {
- assert(r300->valid_vertex_buffer[index]);
- }
- }
- DBG(r300, DBG_UPLOAD, "-------\n");
}
static void r300_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf = r300_buffer(buf);
- struct r300_winsys_screen *rws = r300screen->rws;
+ struct r300_resource *rbuf = r300_resource(buf);
if (rbuf->constant_buffer)
FREE(rbuf->constant_buffer);
if (rbuf->buf)
- rws->buffer_reference(rws, &rbuf->buf, NULL);
+ r300_winsys_bo_reference(&rbuf->buf, NULL);
util_slab_free(&r300screen->pool_buffers, rbuf);
}
@@ -188,15 +121,15 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
struct r300_context *r300 = r300_context(pipe);
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ struct r300_resource *rbuf = r300_resource(transfer->resource);
uint8_t *map;
- if (rbuf->user_buffer)
- return (uint8_t *) rbuf->user_buffer + transfer->box.x;
+ if (rbuf->b.user_ptr)
+ return (uint8_t *) rbuf->b.user_ptr + transfer->box.x;
if (rbuf->constant_buffer)
return (uint8_t *) rbuf->constant_buffer + transfer->box.x;
- map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage);
+ map = rws->buffer_map(rbuf->buf, r300->cs, transfer->usage);
if (map == NULL)
return NULL;
@@ -204,22 +137,15 @@ r300_buffer_transfer_map( struct pipe_context *pipe,
return map + transfer->box.x;
}
-static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *box)
-{
- /* no-op */
-}
-
static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
struct pipe_transfer *transfer )
{
struct r300_screen *r300screen = r300_screen(pipe->screen);
struct r300_winsys_screen *rws = r300screen->rws;
- struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ struct r300_resource *rbuf = r300_resource(transfer->resource);
if (rbuf->buf) {
- rws->buffer_unmap(rws, rbuf->buf);
+ rws->buffer_unmap(rbuf->buf);
}
}
@@ -234,32 +160,32 @@ static void r300_buffer_transfer_inline_write(struct pipe_context *pipe,
{
struct r300_context *r300 = r300_context(pipe);
struct r300_winsys_screen *rws = r300->screen->rws;
- struct r300_buffer *rbuf = r300_buffer(resource);
+ struct r300_resource *rbuf = r300_resource(resource);
uint8_t *map = NULL;
if (rbuf->constant_buffer) {
memcpy(rbuf->constant_buffer + box->x, data, box->width);
return;
}
- assert(rbuf->user_buffer == NULL);
+ assert(rbuf->b.user_ptr == NULL);
- map = rws->buffer_map(rws, rbuf->buf, r300->cs,
+ map = rws->buffer_map(rbuf->buf, r300->cs,
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage);
memcpy(map + box->x, data, box->width);
- rws->buffer_unmap(rws, rbuf->buf);
+ rws->buffer_unmap(rbuf->buf);
}
-struct u_resource_vtbl r300_buffer_vtbl =
+static const struct u_resource_vtbl r300_buffer_vtbl =
{
- u_default_resource_get_handle, /* get_handle */
+ NULL, /* get_handle */
r300_buffer_destroy, /* resource_destroy */
- r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
+ NULL, /* is_buffer_referenced */
r300_buffer_get_transfer, /* get_transfer */
r300_buffer_transfer_destroy, /* transfer_destroy */
r300_buffer_transfer_map, /* transfer_map */
- r300_buffer_transfer_flush_region, /* transfer_flush_region */
+ NULL, /* transfer_flush_region */
r300_buffer_transfer_unmap, /* transfer_unmap */
r300_buffer_transfer_inline_write /* transfer_inline_write */
};
@@ -268,42 +194,41 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf;
+ struct r300_resource *rbuf;
unsigned alignment = 16;
rbuf = util_slab_alloc(&r300screen->pool_buffers);
- rbuf->magic = R300_BUFFER_MAGIC;
-
- rbuf->b.b = *templ;
- rbuf->b.vtbl = &r300_buffer_vtbl;
- pipe_reference_init(&rbuf->b.b.reference, 1);
- rbuf->b.b.screen = screen;
+ rbuf->b.b.b = *templ;
+ rbuf->b.b.vtbl = &r300_buffer_vtbl;
+ pipe_reference_init(&rbuf->b.b.b.reference, 1);
+ rbuf->b.b.b.screen = screen;
+ rbuf->b.user_ptr = NULL;
rbuf->domain = R300_DOMAIN_GTT;
rbuf->buf = NULL;
+ rbuf->buf_size = templ->width0;
rbuf->constant_buffer = NULL;
- rbuf->user_buffer = NULL;
/* Alloc constant buffers in RAM. */
if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) {
rbuf->constant_buffer = MALLOC(templ->width0);
- return &rbuf->b.b;
+ return &rbuf->b.b.b;
}
rbuf->buf =
r300screen->rws->buffer_create(r300screen->rws,
- rbuf->b.b.width0, alignment,
- rbuf->b.b.bind, rbuf->b.b.usage,
+ rbuf->b.b.b.width0, alignment,
+ rbuf->b.b.b.bind, rbuf->b.b.b.usage,
rbuf->domain);
- rbuf->cs_buf =
- r300screen->rws->buffer_get_cs_handle(r300screen->rws, rbuf->buf);
-
if (!rbuf->buf) {
util_slab_free(&r300screen->pool_buffers, rbuf);
return NULL;
}
- return &rbuf->b.b;
+ rbuf->cs_buf =
+ r300screen->rws->buffer_get_cs_handle(rbuf->buf);
+
+ return &rbuf->b.b.b;
}
struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
@@ -311,27 +236,26 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
unsigned bind)
{
struct r300_screen *r300screen = r300_screen(screen);
- struct r300_buffer *rbuf;
+ struct r300_resource *rbuf;
rbuf = util_slab_alloc(&r300screen->pool_buffers);
- rbuf->magic = R300_BUFFER_MAGIC;
-
- pipe_reference_init(&rbuf->b.b.reference, 1);
- rbuf->b.vtbl = &r300_buffer_vtbl;
- rbuf->b.b.screen = screen;
- rbuf->b.b.target = PIPE_BUFFER;
- rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
- rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuf->b.b.bind = bind;
- rbuf->b.b.width0 = ~0;
- rbuf->b.b.height0 = 1;
- rbuf->b.b.depth0 = 1;
- rbuf->b.b.array_size = 1;
- rbuf->b.b.flags = 0;
+ pipe_reference_init(&rbuf->b.b.b.reference, 1);
+ rbuf->b.b.b.screen = screen;
+ rbuf->b.b.b.target = PIPE_BUFFER;
+ rbuf->b.b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuf->b.b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuf->b.b.b.bind = bind;
+ rbuf->b.b.b.width0 = ~0;
+ rbuf->b.b.b.height0 = 1;
+ rbuf->b.b.b.depth0 = 1;
+ rbuf->b.b.b.array_size = 1;
+ rbuf->b.b.b.flags = 0;
+ rbuf->b.b.vtbl = &r300_buffer_vtbl;
+ rbuf->b.user_ptr = ptr;
rbuf->domain = R300_DOMAIN_GTT;
rbuf->buf = NULL;
+ rbuf->buf_size = size;
rbuf->constant_buffer = NULL;
- rbuf->user_buffer = ptr;
- return &rbuf->b.b;
+ return &rbuf->b.b.b;
}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index 58dec8539b6..14bee460d5b 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -35,39 +35,12 @@
#include "r300_winsys.h"
#include "r300_context.h"
-#define R300_BUFFER_MAGIC 0xabcd1234
-#define R300_BUFFER_MAX_RANGES 32
-
-struct r300_buffer_range {
- uint32_t start;
- uint32_t end;
-};
-
-/* Vertex buffer. */
-struct r300_buffer
-{
- struct u_resource b;
-
- uint32_t magic;
-
- struct r300_winsys_buffer *buf;
- struct r300_winsys_cs_buffer *cs_buf;
-
- enum r300_buffer_domain domain;
-
- uint8_t *user_buffer;
- uint8_t *constant_buffer;
-};
-
/* Functions. */
-void r300_upload_user_buffers(struct r300_context *r300,
- int min_index, int max_index);
-
void r300_upload_index_buffer(struct r300_context *r300,
struct pipe_resource **index_buffer,
unsigned index_size, unsigned *start,
- unsigned count);
+ unsigned count, uint8_t *ptr);
struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
@@ -77,8 +50,7 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
unsigned bind);
unsigned r300_buffer_is_referenced(struct pipe_context *context,
- struct pipe_resource *buf,
- enum r300_reference_domain domain);
+ struct pipe_resource *buf);
/* Inline functions. */
@@ -87,9 +59,4 @@ static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
return (struct r300_buffer *)buffer;
}
-static INLINE boolean r300_is_user_buffer(struct pipe_resource *buffer)
-{
- return r300_buffer(buffer)->user_buffer ? true : false;
-}
-
#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 3a97b76a4c8..09f18b3e624 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -28,6 +28,7 @@
#include "util/u_mm.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
@@ -442,8 +443,7 @@ static void r300_set_clip_state(struct pipe_context* pipe,
OUT_CB_TABLE(state->ucp, state->nr * 4);
}
OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) |
- R300_PS_UCP_MODE_CLIP_AS_TRIFAN |
- (state->depth_clamp ? R300_CLIP_DISABLE : 0));
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
END_CB;
r300_mark_atom_dirty(r300, &r300->clip_state);
@@ -617,21 +617,16 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
}
static void r300_tex_set_tiling_flags(struct r300_context *r300,
- struct r300_texture *tex, unsigned level)
+ struct r300_resource *tex,
+ unsigned level)
{
/* Check if the macrotile flag needs to be changed.
* Skip changing the flags otherwise. */
- if (tex->desc.macrotile[tex->surface_level] !=
- tex->desc.macrotile[level]) {
- /* Tiling determines how DRM treats the buffer data.
- * We must flush CS when changing it if the buffer is referenced. */
- if (r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_CS))
- r300->context.flush(&r300->context, 0, NULL);
-
- r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
- tex->desc.microtile, tex->desc.macrotile[level],
- tex->desc.stride_in_bytes[0]);
+ if (tex->tex.macrotile[tex->surface_level] !=
+ tex->tex.macrotile[level]) {
+ r300->rws->buffer_set_tiling(tex->buf, r300->cs,
+ tex->tex.microtile, tex->tex.macrotile[level],
+ tex->tex.stride_in_bytes[0]);
tex->surface_level = level;
}
@@ -646,12 +641,12 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300,
/* Set tiling flags for new surfaces. */
for (i = 0; i < state->nr_cbufs; i++) {
r300_tex_set_tiling_flags(r300,
- r300_texture(state->cbufs[i]->texture),
+ r300_resource(state->cbufs[i]->texture),
state->cbufs[i]->u.tex.level);
}
if (state->zsbuf) {
r300_tex_set_tiling_flags(r300,
- r300_texture(state->zsbuf->texture),
+ r300_resource(state->zsbuf->texture),
state->zsbuf->u.tex.level);
}
}
@@ -660,7 +655,7 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
const char *binding)
{
struct pipe_resource *tex = surf->texture;
- struct r300_texture *rtex = r300_texture(tex);
+ struct r300_resource *rtex = r300_resource(tex);
fprintf(stderr,
"r300: %s[%i] Dim: %ix%i, Firstlayer: %i, "
@@ -673,9 +668,9 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
util_format_short_name(surf->format),
- rtex->desc.macrotile[0] ? "YES" : " NO",
- rtex->desc.microtile ? "YES" : " NO",
- rtex->desc.stride_in_pixels[0],
+ rtex->tex.macrotile[0] ? "YES" : " NO",
+ rtex->tex.microtile ? "YES" : " NO",
+ rtex->tex.stride_in_pixels[0],
tex->width0, tex->height0, tex->depth0,
tex->last_level, util_format_short_name(tex->format));
}
@@ -695,8 +690,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
}
if (change == R300_CHANGED_FB_STATE ||
- change == R300_CHANGED_CBZB_FLAG ||
- change == R300_CHANGED_ZCLEAR_FLAG) {
+ change == R300_CHANGED_HYPERZ_FLAG) {
r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
@@ -720,8 +714,8 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
}
static void
- r300_set_framebuffer_state(struct pipe_context* pipe,
- const struct pipe_framebuffer_state* state)
+r300_set_framebuffer_state(struct pipe_context* pipe,
+ const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
@@ -729,7 +723,6 @@ static void
boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
unsigned max_width, max_height, i;
uint32_t zbuffer_bpp = 0;
- int blocksize;
if (r300->screen->caps.is_r500) {
max_width = max_height = 4096;
@@ -745,6 +738,32 @@ static void
return;
}
+ if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) {
+ /* There is a zmask in use, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
+ /* Decompress the currently bound zbuffer before we bind another one. */
+ r300_decompress_zmask(r300);
+ }
+ } else {
+ /* We don't bind another zbuffer, so lock the current one. */
+ r300->zmask_locked = TRUE;
+ pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
+ }
+ } else if (r300->zmask_locked && r300->locked_zbuffer) {
+ /* We have a locked zbuffer now, what are we gonna do? */
+ if (state->zsbuf) {
+ if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
+ /* We are binding some other zbuffer, so decompress the locked one,
+ * it gets unlocked automatically. */
+ r300_decompress_zmask_locked_unsafe(r300);
+ } else {
+ /* We are binding the locked zbuffer again, so unlock it. */
+ r300->zmask_locked = FALSE;
+ }
+ }
+ }
+
/* If nr_cbufs is changed from zero to non-zero or vice versa... */
if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
r300_mark_atom_dirty(r300, &r300->blend_state);
@@ -759,14 +778,14 @@ static void
util_copy_framebuffer_state(r300->fb_state.state, state);
+ if (!r300->zmask_locked) {
+ pipe_surface_reference(&r300->locked_zbuffer, NULL);
+ }
+
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
- r300->validate_buffers = TRUE;
- r300->z_compression = false;
-
if (state->zsbuf) {
- blocksize = util_format_get_blocksize(state->zsbuf->texture->format);
- switch (blocksize) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
case 2:
zbuffer_bpp = 16;
break;
@@ -774,30 +793,19 @@ static void
zbuffer_bpp = 24;
break;
}
+
+ /* Setup Hyper-Z. */
if (can_hyperz) {
struct r300_surface *zs_surf = r300_surface(state->zsbuf);
- struct r300_texture *tex;
- int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44;
+ struct r300_resource *tex = r300_resource(zs_surf->base.texture);
int level = zs_surf->base.u.tex.level;
- tex = r300_texture(zs_surf->base.texture);
-
/* work out whether we can support hiz on this buffer */
r300_hiz_alloc_block(r300, zs_surf);
-
- /* work out whether we can support zmask features on this buffer */
- r300_zmask_alloc_block(r300, zs_surf, compress);
-
- if (tex->zmask_mem[level]) {
- /* compression causes hangs on 16-bit */
- if (zbuffer_bpp == 24)
- r300->z_compression = compress;
- }
+
DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
- tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
- r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
- tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
+ "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
+ tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef);
}
/* Polygon offset depends on the zbuffer bit depth. */
@@ -1300,7 +1308,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_textures_state* state =
(struct r300_textures_state*)r300->textures_state.state;
- struct r300_texture *texture;
+ struct r300_resource *texture;
unsigned i, real_num_views = 0, view_index = 0;
unsigned tex_units = r300->screen->caps.num_tex_units;
boolean dirty_tex = FALSE;
@@ -1329,8 +1337,8 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
/* Set the texrect factor in the fragment shader.
* Needed for RECT and NPOT fallback. */
- texture = r300_texture(views[i]->texture);
- if (texture->desc.is_npot) {
+ texture = r300_resource(views[i]->texture);
+ if (texture->tex.is_npot) {
r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
}
@@ -1350,7 +1358,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
state->sampler_view_count = count;
r300_mark_atom_dirty(r300, &r300->textures_state);
- r300->validate_buffers = TRUE;
if (dirty_tex) {
r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
@@ -1363,7 +1370,7 @@ r300_create_sampler_view(struct pipe_context *pipe,
const struct pipe_sampler_view *templ)
{
struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
- struct r300_texture *tex = r300_texture(texture);
+ struct r300_resource *tex = r300_resource(texture);
boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
@@ -1465,10 +1472,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
const struct pipe_vertex_buffer* buffers)
{
struct r300_context* r300 = r300_context(pipe);
- const struct pipe_vertex_buffer *vbo;
- unsigned i, max_index = (1 << 24) - 1;
- boolean any_user_buffer = FALSE;
- boolean any_nonuser_buffer = FALSE;
+ unsigned i;
struct pipe_vertex_buffer dummy_vb = {0};
/* There must be at least one vertex buffer set, otherwise it locks up. */
@@ -1478,90 +1482,20 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
count = 1;
}
- if (count == r300->vertex_buffer_count &&
- memcmp(r300->vertex_buffer, buffers,
- sizeof(struct pipe_vertex_buffer) * count) == 0) {
- return;
- }
+ u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers);
if (r300->screen->caps.has_tcl) {
/* HW TCL. */
- r300->incompatible_vb_layout = FALSE;
-
- /* Check if the strides and offsets are aligned to the size of DWORD. */
- for (i = 0; i < count; i++) {
- if (buffers[i].buffer) {
- if (buffers[i].stride % 4 != 0 ||
- buffers[i].buffer_offset % 4 != 0) {
- r300->incompatible_vb_layout = TRUE;
- break;
- }
- }
- }
-
for (i = 0; i < count; i++) {
- vbo = &buffers[i];
-
- /* Skip NULL buffers */
- if (!vbo->buffer) {
- continue;
- }
-
- /* User buffers have no info about maximum index,
- * we will have to compute it in draw_vbo. */
- if (r300_is_user_buffer(vbo->buffer)) {
- any_user_buffer = TRUE;
- continue;
- }
- any_nonuser_buffer = TRUE;
-
- /* The stride of zero means we will be fetching only the first
- * vertex, so don't care about max_index. */
- if (!vbo->stride)
- continue;
-
- /* Update the maximum index. */
- {
- unsigned vbo_max_index =
- (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
- max_index = MIN2(max_index, vbo_max_index);
+ if (buffers[i].buffer &&
+ !r300_resource(buffers[i].buffer)->b.user_ptr) {
}
}
-
- r300->any_user_vbs = any_user_buffer;
- r300->vertex_buffer_max_index = max_index;
r300->vertex_arrays_dirty = TRUE;
- if (any_nonuser_buffer)
- r300->validate_buffers = TRUE;
- if (!any_user_buffer)
- r300->upload_vb_validated = FALSE;
} else {
/* SW TCL. */
draw_set_vertex_buffers(r300->draw, count, buffers);
}
-
- /* Common code. */
- for (i = 0; i < count; i++) {
- vbo = &buffers[i];
-
- /* Reference our buffer. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer);
- if (vbo->buffer && r300_is_user_buffer(vbo->buffer)) {
- pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
- } else {
- pipe_resource_reference(&r300->valid_vertex_buffer[i], vbo->buffer);
- }
- }
- for (; i < r300->vertex_buffer_count; i++) {
- /* Dereference any old buffers. */
- pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
- pipe_resource_reference(&r300->valid_vertex_buffer[i], NULL);
- }
-
- memcpy(r300->vertex_buffer, buffers,
- sizeof(struct pipe_vertex_buffer) * count);
-
- r300->vertex_buffer_count = count;
}
static void r300_set_index_buffer(struct pipe_context* pipe,
@@ -1570,14 +1504,11 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
if (ib && ib->buffer) {
+ assert(ib->offset % ib->index_size == 0);
+
pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer));
-
- if (r300->screen->caps.has_tcl &&
- !r300_is_user_buffer(ib->buffer)) {
- r300->validate_buffers = TRUE;
- r300->upload_ib_validated = FALSE;
- }
+ r300->index_buffer.offset /= r300->index_buffer.index_size;
}
else {
pipe_resource_reference(&r300->index_buffer.buffer, NULL);
@@ -1607,7 +1538,7 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
* so PSC should just route stuff based on the vertex elements,
* and not on attrib information. */
for (i = 0; i < velems->count; i++) {
- format = velems->hw_format[i];
+ format = velems->velem[i].src_format;
type = r300_translate_vertex_data_type(format);
if (type == R300_INVALID_FORMAT) {
@@ -1639,16 +1570,13 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
vstream->count = (i >> 1) + 1;
}
-#define FORMAT_REPLACE(what, withwhat) \
- case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
-
static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
unsigned count,
const struct pipe_vertex_element* attribs)
{
+ struct r300_context *r300 = r300_context(pipe);
struct r300_vertex_element_state *velems;
unsigned i;
- enum pipe_format *format;
struct pipe_vertex_element dummy_attrib = {0};
/* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
@@ -1660,77 +1588,26 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
assert(count <= PIPE_MAX_ATTRIBS);
velems = CALLOC_STRUCT(r300_vertex_element_state);
- if (velems != NULL) {
- velems->count = count;
- memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
-
- if (r300_screen(pipe->screen)->caps.has_tcl) {
- /* Set the best hw format in case the original format is not
- * supported by hw. */
- for (i = 0; i < count; i++) {
- velems->hw_format[i] = velems->velem[i].src_format;
- format = &velems->hw_format[i];
-
- /* This is basically the list of unsupported formats.
- * For now we don't care about the alignment, that's going to
- * be sorted out after the PSC setup. */
- switch (*format) {
- FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
- FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
- FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
- FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_UNORM, R32_FLOAT);
- FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_USCALED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_SNORM, R32_FLOAT);
- FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_SSCALED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
-
- FORMAT_REPLACE(R32_FIXED, R32_FLOAT);
- FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT);
- FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT);
- FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT);
-
- default:;
- }
+ if (!velems)
+ return NULL;
- velems->incompatible_layout =
- velems->incompatible_layout ||
- velems->velem[i].src_format != velems->hw_format[i] ||
- velems->velem[i].src_offset % 4 != 0;
- }
+ velems->count = count;
+ velems->vmgr_elements =
+ u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs,
+ velems->velem);
- /* Now setup PSC.
- * The unused components will be replaced by (..., 0, 1). */
- r300_vertex_psc(velems);
-
- /* Align the formats to the size of DWORD.
- * We only care about the blocksizes of the formats since
- * swizzles are already set up.
- * Also compute the vertex size. */
- for (i = 0; i < count; i++) {
- /* This is OK because we check for aligned strides too
- * elsewhere. */
- velems->hw_format_size[i] =
- align(util_format_get_blocksize(velems->hw_format[i]), 4);
- velems->vertex_size_dwords += velems->hw_format_size[i] / 4;
- }
+ if (r300_screen(pipe->screen)->caps.has_tcl) {
+ /* Setup PSC.
+ * The unused components will be replaced by (..., 0, 1). */
+ r300_vertex_psc(velems);
+
+ for (i = 0; i < count; i++) {
+ velems->format_size[i] =
+ align(util_format_get_blocksize(velems->velem[i].src_format), 4);
+ velems->vertex_size_dwords += velems->format_size[i] / 4;
}
}
+
return velems;
}
@@ -1746,6 +1623,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
r300->velems = velems;
+ u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);
+
if (r300->draw) {
draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
return;
@@ -1758,7 +1637,11 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
{
- FREE(state);
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_vertex_element_state *velems = state;
+
+ u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
+ FREE(state);
}
static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -1845,7 +1728,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_constant_buffer *cbuf;
- struct r300_buffer *rbuf = r300_buffer(buf);
+ struct r300_resource *rbuf = r300_resource(buf);
uint32_t *mapped;
switch (shader) {
@@ -1862,8 +1745,8 @@ static void r300_set_constant_buffer(struct pipe_context *pipe,
if (buf == NULL || buf->width0 == 0)
return;
- if (rbuf->user_buffer)
- mapped = (uint32_t*)rbuf->user_buffer;
+ if (rbuf->b.user_ptr)
+ mapped = (uint32_t*)rbuf->b.user_ptr;
else if (rbuf->constant_buffer)
mapped = (uint32_t*)rbuf->constant_buffer;
else
@@ -1947,6 +1830,7 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.set_vertex_buffers = r300_set_vertex_buffers;
r300->context.set_index_buffer = r300_set_index_buffer;
+ r300->context.redefine_user_buffer = u_default_redefine_user_buffer;
r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 95be7849f8f..003fe9a58cd 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -489,7 +489,8 @@ static void r300_update_rs_block(struct r300_context *r300)
for (; i < ATTR_GENERIC_COUNT; i++) {
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
- "not enough hardware slots.\n", i);
+ "not enough hardware slots (it's not a bug, do not "
+ "report it).\n", i);
}
}
@@ -524,7 +525,8 @@ static void r300_update_rs_block(struct r300_context *r300)
DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
} else {
fprintf(stderr, "r300: ERROR: FS input fog unassigned, "
- "not enough hardware slots.\n");
+ "not enough hardware slots. (it's not a bug, "
+ "do not report it)\n");
}
}
}
@@ -551,7 +553,8 @@ static void r300_update_rs_block(struct r300_context *r300)
} else {
if (fs_inputs->wpos != ATTR_UNUSED && tex_count >= 8) {
fprintf(stderr, "r300: ERROR: FS input WPOS unassigned, "
- "not enough hardware slots.\n");
+ "not enough hardware slots. (it's not a bug, do not "
+ "report it)\n");
}
}
@@ -674,7 +677,20 @@ static uint32_t r300_get_border_color(enum pipe_format format,
case 16:
if (desc->nr_channels <= 2) {
border_swizzled[0] = border_swizzled[2];
- util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+ util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_FLOAT, &uc);
+ } else {
+ util_pack_color(border_swizzled, PIPE_FORMAT_R16G16_UNORM, &uc);
+ }
+ } else {
+ util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ }
+ break;
+
+ case 32:
+ if (desc->nr_channels == 1) {
+ border_swizzled[0] = border_swizzled[2];
+ util_pack_color(border_swizzled, PIPE_FORMAT_R32_FLOAT, &uc);
} else {
util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
}
@@ -684,6 +700,25 @@ static uint32_t r300_get_border_color(enum pipe_format format,
return uc.ui;
}
+static boolean util_format_is_float(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned i;
+
+ if (!format)
+ return FALSE;
+
+ /* Find the first non-void channel. */
+ for (i = 0; i < 4; i++)
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+
+ if (i == 4)
+ return FALSE;
+
+ return desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT ? TRUE : FALSE;
+}
+
static void r300_merge_textures_and_samplers(struct r300_context* r300)
{
struct r300_textures_state *state =
@@ -691,7 +726,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
struct r300_texture_sampler_state *texstate;
struct r300_sampler_state *sampler;
struct r300_sampler_view *view;
- struct r300_texture *tex;
+ struct r300_resource *tex;
unsigned min_level, max_level, i, j, size;
unsigned count = MIN2(state->sampler_view_count,
state->sampler_state_count);
@@ -709,7 +744,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
state->tx_enable |= 1 << i;
view = state->sampler_views[i];
- tex = r300_texture(view->base.texture);
+ tex = r300_resource(view->base.texture);
sampler = state->sampler_states[i];
texstate = &state->regs[i];
@@ -725,32 +760,37 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
/* determine min/max levels */
max_level = MIN3(sampler->max_lod + view->base.u.tex.first_level,
- tex->desc.b.b.last_level, view->base.u.tex.last_level);
+ tex->b.b.b.last_level, view->base.u.tex.last_level);
min_level = MIN2(sampler->min_lod + view->base.u.tex.first_level,
max_level);
- if (tex->desc.is_npot && min_level > 0) {
+ if (tex->tex.is_npot && min_level > 0) {
/* Even though we do not implement mipmapping for NPOT
* textures, we should at least honor the minimum level
* which is allowed to be displayed. We do this by setting up
- * an i-th mipmap level as the zero level. */
- r300_texture_setup_format_state(r300->screen, &tex->desc,
+ * the i-th mipmap level as the zero level. */
+ unsigned offset = tex->tex_offset +
+ tex->tex.offset_in_bytes[min_level];
+
+ r300_texture_setup_format_state(r300->screen, tex,
min_level,
&texstate->format);
- texstate->format.tile_config |=
- tex->desc.offset_in_bytes[min_level] & 0xffffffe0;
- assert((tex->desc.offset_in_bytes[min_level] & 0x1f) == 0);
+ texstate->format.tile_config |= offset & 0xffffffe0;
+ assert((offset & 0x1f) == 0);
+ } else {
+ texstate->format.tile_config |= tex->tex_offset & 0xffffffe0;
+ assert((tex->tex_offset & 0x1f) == 0);
}
/* Assign a texture cache region. */
texstate->format.format1 |= view->texcache_region;
/* Depth textures are kinda special. */
- if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
+ if (util_format_is_depth_or_stencil(tex->b.b.b.format)) {
unsigned char depth_swizzle[4];
if (!r300->screen->caps.is_r500 &&
- util_format_get_blocksizebits(tex->desc.b.b.format) == 32) {
+ util_format_get_blocksizebits(tex->b.b.b.format) == 32) {
/* X24x8 is sampled as Y16X16 on r3xx-r4xx.
* The depth here is at the Y component. */
for (j = 0; j < 4; j++)
@@ -775,17 +815,17 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
if (r300->screen->caps.dxtc_swizzle &&
- util_format_is_compressed(tex->desc.b.b.format)) {
+ util_format_is_compressed(tex->b.b.b.format)) {
texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE;
}
/* to emulate 1D textures through 2D ones correctly */
- if (tex->desc.b.b.target == PIPE_TEXTURE_1D) {
+ if (tex->b.b.b.target == PIPE_TEXTURE_1D) {
texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
}
- if (tex->desc.is_npot) {
+ if (tex->tex.is_npot) {
/* NPOT textures don't support mip filter, unfortunately.
* This prevents incorrect rendering. */
texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
@@ -814,6 +854,32 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
}
+ /* Float textures only support nearest and mip-nearest filtering. */
+ if (util_format_is_float(tex->b.b.b.format)) {
+ /* No MAG linear filtering. */
+ if ((texstate->filter0 & R300_TX_MAG_FILTER_MASK) ==
+ R300_TX_MAG_FILTER_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MAG_FILTER_MASK;
+ texstate->filter0 |= R300_TX_MAG_FILTER_NEAREST;
+ }
+ /* No MIN linear filtering. */
+ if ((texstate->filter0 & R300_TX_MIN_FILTER_MASK) ==
+ R300_TX_MIN_FILTER_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MIN_FILTER_MASK;
+ texstate->filter0 |= R300_TX_MIN_FILTER_NEAREST;
+ }
+ /* No mipmap linear filtering. */
+ if ((texstate->filter0 & R300_TX_MIN_FILTER_MIP_MASK) ==
+ R300_TX_MIN_FILTER_MIP_LINEAR) {
+ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
+ texstate->filter0 |= R300_TX_MIN_FILTER_MIP_NEAREST;
+ }
+ /* No anisotropic filtering. */
+ texstate->filter0 &= ~R300_TX_MAX_ANISO_MASK;
+ texstate->filter1 &= ~R500_TX_MAX_ANISO_MASK;
+ texstate->filter1 &= ~R500_TX_ANISO_HIGH_QUALITY;
+ }
+
texstate->filter0 |= i << 28;
size += 16;
@@ -862,11 +928,35 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
-void r300_update_derived_state(struct r300_context* r300)
+static void r300_decompress_depth_textures(struct r300_context *r300)
{
- r300_flush_depth_textures(r300);
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct pipe_resource *tex;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+ unsigned i;
+
+ if (!r300->zmask_locked || !r300->locked_zbuffer) {
+ return;
+ }
+ for (i = 0; i < count; i++) {
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ tex = state->sampler_views[i]->base.texture;
+
+ if (tex == r300->locked_zbuffer->texture) {
+ r300_decompress_zmask_locked(r300);
+ return;
+ }
+ }
+ }
+}
+
+void r300_update_derived_state(struct r300_context* r300)
+{
if (r300->textures_state.dirty) {
+ r300_decompress_depth_textures(r300);
r300_merge_textures_and_samplers(r300);
}
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 7e501221b1f..06da04c7ad7 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -25,13 +25,9 @@
#define R300_STATE_INLINES_H
#include "draw/draw_vertex.h"
-
#include "pipe/p_format.h"
-
#include "util/u_format.h"
-
#include "r300_reg.h"
-
#include <stdio.h>
/* Some maths. These should probably find their way to u_math, if needed. */
@@ -341,24 +337,6 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
R500_TX_ANISO_HIGH_QUALITY;
}
-/* Non-CSO state. (For now.) */
-
-static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
-{
- switch (pipe_count) {
- case 1:
- return R300_GB_TILE_PIPE_COUNT_RV300;
- case 2:
- return R300_GB_TILE_PIPE_COUNT_R300;
- case 3:
- return R300_GB_TILE_PIPE_COUNT_R420_3P;
- case 4:
- return R300_GB_TILE_PIPE_COUNT_R420;
- }
- return 0;
-}
-
-
/* Translate pipe_formats into PSC vertex types. */
static INLINE uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 6fdc504ed54..354144cac79 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -370,14 +370,18 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
switch (format) {
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
+ /*case PIPE_FORMAT_A8_SNORM:*/
case PIPE_FORMAT_I8_UNORM:
+ /*case PIPE_FORMAT_I8_SNORM:*/
case PIPE_FORMAT_L8_UNORM:
+ /*case PIPE_FORMAT_L8_SNORM:*/
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return R300_COLOR_FORMAT_I8;
/* 16-bit buffers. */
case PIPE_FORMAT_L8A8_UNORM:
+ /*case PIPE_FORMAT_L8A8_SNORM:*/
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8_SNORM:
return R300_COLOR_FORMAT_UV88;
@@ -395,13 +399,21 @@ static uint32_t r300_translate_colorformat(enum pipe_format format)
/* 32-bit buffers. */
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
case PIPE_FORMAT_A8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
case PIPE_FORMAT_R8G8B8X8_UNORM:
+ /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
return R300_COLOR_FORMAT_ARGB8888;
@@ -506,9 +518,12 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* 8-bit outputs, one channel.
* COLORFORMAT_I8 stores the C2 component. */
case PIPE_FORMAT_A8_UNORM:
+ /*case PIPE_FORMAT_A8_SNORM:*/
return modifier | R300_C2_SEL_A;
case PIPE_FORMAT_I8_UNORM:
+ /*case PIPE_FORMAT_I8_SNORM:*/
case PIPE_FORMAT_L8_UNORM:
+ /*case PIPE_FORMAT_L8_SNORM:*/
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return modifier | R300_C2_SEL_R;
@@ -516,6 +531,7 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* 16-bit outputs, two channels.
* COLORFORMAT_UV88 stores C2 and C0. */
case PIPE_FORMAT_L8A8_UNORM:
+ /*case PIPE_FORMAT_L8A8_SNORM:*/
return modifier | R300_C0_SEL_A | R300_C2_SEL_R;
case PIPE_FORMAT_R8G8_UNORM:
case PIPE_FORMAT_R8G8_SNORM:
@@ -528,7 +544,9 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_B4G4R4X4_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8A8_SNORM:*/
case PIPE_FORMAT_B8G8R8X8_UNORM:
+ /*case PIPE_FORMAT_B8G8R8X8_SNORM:*/
case PIPE_FORMAT_B10G10R10A2_UNORM:
return modifier |
R300_C0_SEL_B | R300_C1_SEL_G |
@@ -536,20 +554,26 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format)
/* ARGB outputs. */
case PIPE_FORMAT_A8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_A8R8G8B8_SNORM:*/
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /*case PIPE_FORMAT_X8R8G8B8_SNORM:*/
return modifier |
R300_C0_SEL_A | R300_C1_SEL_R |
R300_C2_SEL_G | R300_C3_SEL_B;
/* ABGR outputs. */
case PIPE_FORMAT_A8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_A8B8G8R8_SNORM:*/
case PIPE_FORMAT_X8B8G8R8_UNORM:
+ /*case PIPE_FORMAT_X8B8G8R8_SNORM:*/
return modifier |
R300_C0_SEL_A | R300_C1_SEL_B |
R300_C2_SEL_G | R300_C3_SEL_R;
/* RGBA outputs. */
case PIPE_FORMAT_R8G8B8X8_UNORM:
+ /*case PIPE_FORMAT_R8G8B8X8_SNORM:*/
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8A8_SNORM:
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
case PIPE_FORMAT_R10G10B10A2_UNORM:
@@ -585,11 +609,12 @@ boolean r300_is_sampler_format_supported(enum pipe_format format)
}
void r300_texture_setup_format_state(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level,
struct r300_texture_format_state *out)
{
- struct pipe_resource *pt = &desc->b.b;
+ struct pipe_resource *pt = &tex->b.b.b;
+ struct r300_texture_desc *desc = &tex->tex;
boolean is_r500 = screen->caps.is_r500;
/* Mask out all the fields we change. */
@@ -632,163 +657,152 @@ void r300_texture_setup_format_state(struct r300_screen *screen,
R300_TXO_MICRO_TILE(desc->microtile);
}
-static void r300_texture_setup_fb_state(struct r300_screen* screen,
- struct r300_texture* tex)
+static void r300_texture_setup_fb_state(struct r300_surface *surf)
{
- unsigned i;
+ struct r300_resource *tex = r300_resource(surf->base.texture);
+ unsigned level = surf->base.u.tex.level;
/* Set framebuffer state. */
- if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) {
- for (i = 0; i <= tex->desc.b.b.last_level; i++) {
- tex->fb_state.pitch[i] =
- tex->desc.stride_in_pixels[i] |
- R300_DEPTHMACROTILE(tex->desc.macrotile[i]) |
- R300_DEPTHMICROTILE(tex->desc.microtile);
- }
- tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format);
+ if (util_format_is_depth_or_stencil(surf->base.format)) {
+ surf->pitch =
+ tex->tex.stride_in_pixels[level] |
+ R300_DEPTHMACROTILE(tex->tex.macrotile[level]) |
+ R300_DEPTHMICROTILE(tex->tex.microtile);
+ surf->format = r300_translate_zsformat(surf->base.format);
} else {
- for (i = 0; i <= tex->desc.b.b.last_level; i++) {
- tex->fb_state.pitch[i] =
- tex->desc.stride_in_pixels[i] |
- r300_translate_colorformat(tex->desc.b.b.format) |
- R300_COLOR_TILE(tex->desc.macrotile[i]) |
- R300_COLOR_MICROTILE(tex->desc.microtile);
- }
- tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format);
+ surf->pitch =
+ tex->tex.stride_in_pixels[level] |
+ r300_translate_colorformat(surf->base.format) |
+ R300_COLOR_TILE(tex->tex.macrotile[level]) |
+ R300_COLOR_MICROTILE(tex->tex.microtile);
+ surf->format = r300_translate_out_fmt(surf->base.format);
}
}
-void r300_texture_reinterpret_format(struct pipe_screen *screen,
+boolean r300_resource_set_properties(struct pipe_screen *screen,
struct pipe_resource *tex,
- enum pipe_format new_format)
+ unsigned offset,
+ const struct pipe_resource *new_properties)
{
- struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_screen *rscreen = r300_screen(screen);
+ struct r300_resource *res = r300_resource(tex);
- SCREEN_DBG(r300screen, DBG_TEX,
- "r300: texture_reinterpret_format: %s -> %s\n",
+ SCREEN_DBG(rscreen, DBG_TEX,
+ "r300: texture_set_properties: %s -> %s\n",
util_format_short_name(tex->format),
- util_format_short_name(new_format));
-
- tex->format = new_format;
+ util_format_short_name(new_properties->format));
- r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex));
-}
-
-static unsigned r300_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- struct r300_context *r300 = r300_context(context);
- struct r300_texture *rtex = (struct r300_texture *)texture;
-
- if (r300->rws->cs_is_buffer_referenced(r300->cs,
- rtex->cs_buffer, R300_REF_CS))
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ if (!r300_texture_desc_init(rscreen, res, new_properties)) {
+ fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n");
+ return FALSE;
+ }
+ res->tex_offset = offset;
+ r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format);
- return PIPE_UNREFERENCED;
+ return TRUE;
}
static void r300_texture_destroy(struct pipe_screen *screen,
struct pipe_resource* texture)
{
- struct r300_texture* tex = (struct r300_texture*)texture;
- struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys;
+ struct r300_resource* tex = (struct r300_resource*)texture;
int i;
- rws->buffer_reference(rws, &tex->buffer, NULL);
+ r300_winsys_bo_reference(&tex->buf, NULL);
for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) {
if (tex->hiz_mem[i])
u_mmFreeMem(tex->hiz_mem[i]);
- if (tex->zmask_mem[i])
- u_mmFreeMem(tex->zmask_mem[i]);
}
FREE(tex);
}
-static boolean r300_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *texture,
- struct winsys_handle *whandle)
+boolean r300_resource_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle)
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
- struct r300_texture* tex = (struct r300_texture*)texture;
+ struct r300_resource* tex = (struct r300_resource*)texture;
if (!tex) {
return FALSE;
}
- return rws->buffer_get_handle(rws, tex->buffer,
- tex->desc.stride_in_bytes[0], whandle);
+ return rws->buffer_get_handle(tex->buf,
+ tex->tex.stride_in_bytes[0], whandle);
}
-struct u_resource_vtbl r300_texture_vtbl =
+static const struct u_resource_vtbl r300_texture_vtbl =
{
- r300_texture_get_handle, /* get_handle */
- r300_texture_destroy, /* resource_destroy */
- r300_texture_is_referenced, /* is_resource_referenced */
- r300_texture_get_transfer, /* get_transfer */
- r300_texture_transfer_destroy, /* transfer_destroy */
- r300_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region, /* transfer_flush_region */
- r300_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ NULL, /* get_handle */
+ r300_texture_destroy, /* resource_destroy */
+ NULL, /* is_resource_referenced */
+ r300_texture_get_transfer, /* get_transfer */
+ r300_texture_transfer_destroy, /* transfer_destroy */
+ r300_texture_transfer_map, /* transfer_map */
+ NULL, /* transfer_flush_region */
+ r300_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
};
/* The common texture constructor. */
-static struct r300_texture*
+static struct r300_resource*
r300_texture_create_object(struct r300_screen *rscreen,
const struct pipe_resource *base,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
unsigned stride_in_bytes_override,
unsigned max_buffer_size,
- struct r300_winsys_buffer *buffer)
+ struct r300_winsys_bo *buffer)
{
struct r300_winsys_screen *rws = rscreen->rws;
- struct r300_texture *tex = CALLOC_STRUCT(r300_texture);
+ struct r300_resource *tex = CALLOC_STRUCT(r300_resource);
if (!tex) {
if (buffer)
- rws->buffer_reference(rws, &buffer, NULL);
+ r300_winsys_bo_reference(&buffer, NULL);
return NULL;
}
- /* Initialize the descriptor. */
- if (!r300_texture_desc_init(rscreen, &tex->desc, base,
- microtile, macrotile,
- stride_in_bytes_override,
- max_buffer_size)) {
+ pipe_reference_init(&tex->b.b.b.reference, 1);
+ tex->b.b.b.screen = &rscreen->screen;
+ tex->b.b.b.usage = base->usage;
+ tex->b.b.b.bind = base->bind;
+ tex->b.b.b.flags = base->flags;
+ tex->b.b.vtbl = &r300_texture_vtbl;
+ tex->tex.microtile = microtile;
+ tex->tex.macrotile[0] = macrotile;
+ tex->tex.stride_in_bytes_override = stride_in_bytes_override;
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ R300_DOMAIN_GTT :
+ R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
+ tex->buf_size = max_buffer_size;
+
+ if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) {
if (buffer)
- rws->buffer_reference(rws, &buffer, NULL);
+ r300_winsys_bo_reference(&buffer, NULL);
FREE(tex);
return NULL;
}
- /* Initialize the hardware state. */
- r300_texture_setup_format_state(rscreen, &tex->desc, 0, &tex->tx_format);
- r300_texture_setup_fb_state(rscreen, tex);
-
- tex->desc.b.vtbl = &r300_texture_vtbl;
- pipe_reference_init(&tex->desc.b.b.reference, 1);
- tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
- R300_DOMAIN_GTT :
- R300_DOMAIN_VRAM | R300_DOMAIN_GTT;
- tex->buffer = buffer;
/* Create the backing buffer if needed. */
- if (!tex->buffer) {
- tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048,
+ if (!buffer) {
+ tex->buf_size = tex->tex.size_in_bytes;
+ tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
base->bind, base->usage, tex->domain);
- if (!tex->buffer) {
+ if (!tex->buf) {
FREE(tex);
return NULL;
}
+ } else {
+ tex->buf = buffer;
}
- tex->cs_buffer = rws->buffer_get_cs_handle(rws, tex->buffer);
+ tex->cs_buf = rws->buffer_get_cs_handle(tex->buf);
- rws->buffer_set_tiling(rws, tex->buffer,
- tex->desc.microtile, tex->desc.macrotile[0],
- tex->desc.stride_in_bytes[0]);
+ rws->buffer_set_tiling(tex->buf, NULL,
+ tex->tex.microtile, tex->tex.macrotile[0],
+ tex->tex.stride_in_bytes[0]);
return tex;
}
@@ -820,7 +834,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
struct r300_screen *rscreen = r300_screen(screen);
- struct r300_winsys_buffer *buffer;
+ struct r300_winsys_bo *buffer;
enum r300_buffer_tiling microtile, macrotile;
unsigned stride, size;
@@ -836,7 +850,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
if (!buffer)
return NULL;
- rws->buffer_get_tiling(rws, buffer, &microtile, &macrotile);
+ rws->buffer_get_tiling(buffer, &microtile, &macrotile);
/* Enforce a microtiled zbuffer. */
if (util_format_is_depth_or_stencil(base->format) &&
@@ -847,7 +861,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
break;
case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT))
+ if (rws->get_value(rws, R300_VID_DRM_2_1_0))
microtile = R300_BUFFER_SQUARETILED;
break;
}
@@ -864,7 +878,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
struct pipe_resource* texture,
const struct pipe_surface *surf_tmpl)
{
- struct r300_texture* tex = r300_texture(texture);
+ struct r300_resource* tex = r300_resource(texture);
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
unsigned level = surf_tmpl->u.tex.level;
@@ -884,28 +898,27 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
surface->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- surface->buffer = tex->buffer;
- surface->cs_buffer = tex->cs_buffer;
+ surface->buf = tex->buf;
+ surface->cs_buf = tex->cs_buf;
/* Prefer VRAM if there are multiple domains to choose from. */
surface->domain = tex->domain;
if (surface->domain & R300_DOMAIN_VRAM)
surface->domain &= ~R300_DOMAIN_GTT;
- surface->offset = r300_texture_get_offset(&tex->desc, level,
+ surface->offset = r300_texture_get_offset(tex, level,
surf_tmpl->u.tex.first_layer);
- surface->pitch = tex->fb_state.pitch[level];
- surface->format = tex->fb_state.format;
+ r300_texture_setup_fb_state(surface);
/* Parameters for the CBZB clear. */
- surface->cbzb_allowed = tex->desc.cbzb_allowed[level];
+ surface->cbzb_allowed = tex->tex.cbzb_allowed[level];
surface->cbzb_width = align(surface->base.width, 64);
/* Height must be aligned to the size of a tile. */
- tile_height = r300_get_pixel_alignment(tex->desc.b.b.format,
- tex->desc.b.b.nr_samples,
- tex->desc.microtile,
- tex->desc.macrotile[level],
+ tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
DIM_HEIGHT, 0);
surface->cbzb_height = align((surface->base.height + 1) / 2,
@@ -914,7 +927,7 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
/* Offset must be aligned to 2K and must point at the beginning
* of a scanline. */
offset = surface->offset +
- tex->desc.stride_in_bytes[level] * surface->cbzb_height;
+ tex->tex.stride_in_bytes[level] * surface->cbzb_height;
surface->cbzb_midpoint_offset = offset & ~2047;
surface->cbzb_pitch = surface->pitch & 0x1ffffc;
@@ -929,8 +942,8 @@ struct pipe_surface* r300_create_surface(struct pipe_context * ctx,
surface->cbzb_allowed ? "YES" : " NO",
surface->cbzb_width, surface->cbzb_height,
offset & 2047,
- tex->desc.microtile ? "YES" : " NO",
- tex->desc.macrotile[level] ? "YES" : " NO");
+ tex->tex.microtile ? "YES" : " NO",
+ tex->tex.macrotile[level] ? "YES" : " NO");
}
return &surface->base;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 0ab22f747e4..158a387478f 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -32,7 +32,7 @@ struct pipe_resource;
struct winsys_handle;
struct r300_texture_format_state;
struct r300_texture_desc;
-struct r300_texture;
+struct r300_resource;
struct r300_screen;
unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
@@ -46,9 +46,10 @@ uint32_t r300_translate_texformat(enum pipe_format format,
uint32_t r500_tx_format_msb_bit(enum pipe_format format);
-void r300_texture_reinterpret_format(struct pipe_screen *screen,
+boolean r300_resource_set_properties(struct pipe_screen *screen,
struct pipe_resource *tex,
- enum pipe_format new_format);
+ unsigned offset,
+ const struct pipe_resource *new_properties);
boolean r300_is_colorbuffer_format_supported(enum pipe_format format);
@@ -57,10 +58,14 @@ boolean r300_is_zs_format_supported(enum pipe_format format);
boolean r300_is_sampler_format_supported(enum pipe_format format);
void r300_texture_setup_format_state(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level,
struct r300_texture_format_state *out);
+boolean r300_resource_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle);
+
struct pipe_resource*
r300_texture_from_handle(struct pipe_screen* screen,
const struct pipe_resource* base,
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index 7b1739142d4..2cfeec7d751 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -90,19 +90,19 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
}
/* Return true if macrotiling should be enabled on the miplevel. */
-static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
+static boolean r300_texture_macro_switch(struct r300_resource *tex,
unsigned level,
boolean rv350_mode,
enum r300_dim dim)
{
unsigned tile, texdim;
- tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples,
- desc->microtile, R300_BUFFER_TILED, dim, 0);
+ tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples,
+ tex->tex.microtile, R300_BUFFER_TILED, dim, 0);
if (dim == DIM_WIDTH) {
- texdim = u_minify(desc->width0, level);
+ texdim = u_minify(tex->tex.width0, level);
} else {
- texdim = u_minify(desc->height0, level);
+ texdim = u_minify(tex->tex.height0, level);
}
/* See TX_FILTER1_n.MACRO_SWITCH. */
@@ -118,7 +118,7 @@ static boolean r300_texture_macro_switch(struct r300_texture_desc *desc,
* at the given level.
*/
static unsigned r300_texture_get_stride(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
unsigned level)
{
unsigned tile_width, width, stride;
@@ -126,62 +126,62 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen,
screen->caps.family == CHIP_FAMILY_RS690 ||
screen->caps.family == CHIP_FAMILY_RS740);
- if (desc->stride_in_bytes_override)
- return desc->stride_in_bytes_override;
+ if (tex->tex.stride_in_bytes_override)
+ return tex->tex.stride_in_bytes_override;
/* Check the level. */
- if (level > desc->b.b.last_level) {
+ if (level > tex->b.b.b.last_level) {
SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
- __FUNCTION__, level, desc->b.b.last_level);
+ __FUNCTION__, level, tex->b.b.b.last_level);
return 0;
}
- width = u_minify(desc->width0, level);
+ width = u_minify(tex->tex.width0, level);
- if (util_format_is_plain(desc->b.b.format)) {
- tile_width = r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
+ if (util_format_is_plain(tex->b.b.b.format)) {
+ tile_width = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
DIM_WIDTH, is_rs690);
width = align(width, tile_width);
- stride = util_format_get_stride(desc->b.b.format, width);
+ stride = util_format_get_stride(tex->b.b.b.format, width);
/* The alignment to 32 bytes is sort of implied by the layout... */
return stride;
} else {
- return align(util_format_get_stride(desc->b.b.format, width), is_rs690 ? 64 : 32);
+ return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32);
}
}
-static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
+static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
unsigned level,
boolean *out_aligned_for_cbzb)
{
unsigned height, tile_height;
- height = u_minify(desc->height0, level);
+ height = u_minify(tex->tex.height0, level);
- if (util_format_is_plain(desc->b.b.format)) {
- tile_height = r300_get_pixel_alignment(desc->b.b.format,
- desc->b.b.nr_samples,
- desc->microtile,
- desc->macrotile[level],
+ /* Mipmapped and 3D textures must have their height aligned to POT. */
+ if ((tex->b.b.b.target != PIPE_TEXTURE_1D &&
+ tex->b.b.b.target != PIPE_TEXTURE_2D &&
+ tex->b.b.b.target != PIPE_TEXTURE_RECT) ||
+ tex->b.b.b.last_level != 0) {
+ height = util_next_power_of_two(height);
+ }
+
+ if (util_format_is_plain(tex->b.b.b.format)) {
+ tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
+ tex->b.b.b.nr_samples,
+ tex->tex.microtile,
+ tex->tex.macrotile[level],
DIM_HEIGHT, 0);
height = align(height, tile_height);
- /* This is needed for the kernel checker, unfortunately. */
- if ((desc->b.b.target != PIPE_TEXTURE_1D &&
- desc->b.b.target != PIPE_TEXTURE_2D &&
- desc->b.b.target != PIPE_TEXTURE_RECT) ||
- desc->b.b.last_level != 0) {
- height = util_next_power_of_two(height);
- }
-
/* See if the CBZB clear can be used on the buffer,
* taking the texture size into account. */
if (out_aligned_for_cbzb) {
- if (desc->macrotile[level]) {
+ if (tex->tex.macrotile[level]) {
/* When clearing, the layer (width*height) is horizontally split
* into two, and the upper and lower halves are cleared by the CB
* and ZB units, respectively. Therefore, the number of macrotiles
@@ -189,10 +189,10 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
/* Align the height so that there is an even number of macrotiles.
* Do so for 3 or more macrotiles in the Y direction. */
- if (level == 0 && desc->b.b.last_level == 0 &&
- (desc->b.b.target == PIPE_TEXTURE_1D ||
- desc->b.b.target == PIPE_TEXTURE_2D ||
- desc->b.b.target == PIPE_TEXTURE_RECT) &&
+ if (level == 0 && tex->b.b.b.last_level == 0 &&
+ (tex->b.b.b.target == PIPE_TEXTURE_1D ||
+ tex->b.b.b.target == PIPE_TEXTURE_2D ||
+ tex->b.b.b.target == PIPE_TEXTURE_RECT) &&
height >= tile_height * 3) {
height = align(height, tile_height * 2);
}
@@ -204,11 +204,11 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc,
}
}
- return util_format_get_nblocksy(desc->b.b.format, height);
+ return util_format_get_nblocksy(tex->b.b.b.format, height);
}
static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
- struct r300_texture_desc *desc)
+ struct r300_resource *tex)
{
/* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
* incorrectly. This is a workaround to prevent CS from being rejected. */
@@ -216,17 +216,17 @@ static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
unsigned i, size;
if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
- desc->b.b.target == PIPE_TEXTURE_3D &&
- desc->b.b.last_level > 0) {
+ tex->b.b.b.target == PIPE_TEXTURE_3D &&
+ tex->b.b.b.last_level > 0) {
size = 0;
- for (i = 0; i <= desc->b.b.last_level; i++) {
- size += desc->stride_in_bytes[i] *
- r300_texture_get_nblocksy(desc, i, FALSE);
+ for (i = 0; i <= tex->b.b.b.last_level; i++) {
+ size += tex->tex.stride_in_bytes[i] *
+ r300_texture_get_nblocksy(tex, i, FALSE);
}
- size *= desc->depth0;
- desc->size_in_bytes = size;
+ size *= tex->tex.depth0;
+ tex->tex.size_in_bytes = size;
}
}
@@ -239,15 +239,15 @@ static unsigned stride_to_width(enum pipe_format format,
}
static void r300_setup_miptree(struct r300_screen *screen,
- struct r300_texture_desc *desc,
+ struct r300_resource *tex,
boolean align_for_cbzb)
{
- struct pipe_resource *base = &desc->b.b;
+ struct pipe_resource *base = &tex->b.b.b;
unsigned stride, size, layer_size, nblocksy, i;
boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
boolean aligned_for_cbzb;
- desc->size_in_bytes = 0;
+ tex->tex.size_in_bytes = 0;
SCREEN_DBG(screen, DBG_TEXALLOC,
"r300: Making miptree for texture, format %s\n",
@@ -255,21 +255,21 @@ static void r300_setup_miptree(struct r300_screen *screen,
for (i = 0; i <= base->last_level; i++) {
/* Let's see if this miplevel can be macrotiled. */
- desc->macrotile[i] =
- (desc->macrotile[0] == R300_BUFFER_TILED &&
- r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ?
+ tex->tex.macrotile[i] =
+ (tex->tex.macrotile[0] == R300_BUFFER_TILED &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
R300_BUFFER_TILED : R300_BUFFER_LINEAR;
- stride = r300_texture_get_stride(screen, desc, i);
+ stride = r300_texture_get_stride(screen, tex, i);
/* Compute the number of blocks in Y, see if the CBZB clear can be
* used on the texture. */
aligned_for_cbzb = FALSE;
- if (align_for_cbzb && desc->cbzb_allowed[i])
- nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb);
+ if (align_for_cbzb && tex->tex.cbzb_allowed[i])
+ nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb);
else
- nblocksy = r300_texture_get_nblocksy(desc, i, NULL);
+ nblocksy = r300_texture_get_nblocksy(tex, i, NULL);
layer_size = stride * nblocksy;
@@ -280,75 +280,136 @@ static void r300_setup_miptree(struct r300_screen *screen,
if (base->target == PIPE_TEXTURE_CUBE)
size = layer_size * 6;
else
- size = layer_size * u_minify(desc->depth0, i);
+ size = layer_size * u_minify(tex->tex.depth0, i);
- desc->offset_in_bytes[i] = desc->size_in_bytes;
- desc->size_in_bytes = desc->offset_in_bytes[i] + size;
- desc->layer_size_in_bytes[i] = layer_size;
- desc->stride_in_bytes[i] = stride;
- desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride);
- desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb;
+ tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes;
+ tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size;
+ tex->tex.layer_size_in_bytes[i] = layer_size;
+ tex->tex.stride_in_bytes[i] = stride;
+ tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride);
+ tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb;
SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
- i, u_minify(desc->width0, i), u_minify(desc->height0, i),
- u_minify(desc->depth0, i), stride, desc->size_in_bytes,
- desc->macrotile[i] ? "TRUE" : "FALSE");
+ i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i),
+ u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes,
+ tex->tex.macrotile[i] ? "TRUE" : "FALSE");
}
}
-static void r300_setup_flags(struct r300_texture_desc *desc)
+static void r300_setup_flags(struct r300_resource *tex)
{
- desc->uses_stride_addressing =
- !util_is_power_of_two(desc->b.b.width0) ||
- (desc->stride_in_bytes_override &&
- stride_to_width(desc->b.b.format,
- desc->stride_in_bytes_override) != desc->b.b.width0);
-
- desc->is_npot =
- desc->uses_stride_addressing ||
- !util_is_power_of_two(desc->b.b.height0) ||
- !util_is_power_of_two(desc->b.b.depth0);
+ tex->tex.uses_stride_addressing =
+ !util_is_power_of_two(tex->b.b.b.width0) ||
+ (tex->tex.stride_in_bytes_override &&
+ stride_to_width(tex->b.b.b.format,
+ tex->tex.stride_in_bytes_override) != tex->b.b.b.width0);
+
+ tex->tex.is_npot =
+ tex->tex.uses_stride_addressing ||
+ !util_is_power_of_two(tex->b.b.b.height0) ||
+ !util_is_power_of_two(tex->b.b.b.depth0);
}
static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
- struct r300_texture_desc *desc)
+ struct r300_resource *tex)
{
unsigned i, bpp;
boolean first_level_valid;
- bpp = util_format_get_blocksizebits(desc->b.b.format);
+ bpp = util_format_get_blocksizebits(tex->b.b.b.format);
/* 1) The texture must be point-sampled,
* 2) The depth must be 16 or 32 bits.
* 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage
* with certain texture sizes. Macrotiling ensures the alignment. */
- first_level_valid = desc->b.b.nr_samples <= 1 &&
+ first_level_valid = tex->b.b.b.nr_samples <= 1 &&
(bpp == 16 || bpp == 32) &&
- desc->macrotile[0];
+ tex->tex.macrotile[0];
if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB))
first_level_valid = FALSE;
- for (i = 0; i <= desc->b.b.last_level; i++)
- desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i];
+ for (i = 0; i <= tex->b.b.b.last_level; i++)
+ tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i];
+}
+
+#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+
+static void r300_setup_zmask_flags(struct r300_screen *screen,
+ struct r300_resource *tex)
+{
+ /* The tile size of 1 DWORD is:
+ *
+ * GPU Pipes 4x4 mode 8x8 mode
+ * ------------------------------------------
+ * R580 4P/1Z 32x32 64x64
+ * RV570 3P/1Z 48x16 96x32
+ * RV530 1P/2Z 32x16 64x32
+ * 1P/1Z 16x16 32x32
+ */
+ static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8};
+ static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8};
+
+ if (util_format_is_depth_or_stencil(tex->b.b.b.format) &&
+ util_format_get_blocksizebits(tex->b.b.b.format) == 32 &&
+ tex->tex.microtile) {
+ unsigned i, pipes;
+
+ if (screen->caps.family == CHIP_FAMILY_RV530) {
+ pipes = screen->caps.num_z_pipes;
+ } else {
+ pipes = screen->caps.num_frag_pipes;
+ }
+
+ for (i = 0; i <= tex->b.b.b.last_level; i++) {
+ unsigned numdw, compsize;
+
+ /* The 8x8 compression mode needs macrotiling. */
+ compsize = screen->caps.z_compress == R300_ZCOMP_8X8 &&
+ tex->tex.macrotile[i] &&
+ tex->b.b.b.nr_samples <= 1 ? 8 : 4;
+
+ /* Get the zbuffer size (with the aligned width and height). */
+ numdw = align(tex->tex.stride_in_pixels[i],
+ num_blocks_x_per_dw[pipes-1] * compsize) *
+ align(u_minify(tex->b.b.b.height0, i),
+ num_blocks_y_per_dw[pipes-1] * compsize);
+
+ /* Convert pixels -> dwords. */
+ numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize *
+ num_blocks_y_per_dw[pipes-1] * compsize);
+
+ /* Check that we have enough ZMASK memory. */
+ if (numdw <= screen->caps.zmask_ram * pipes) {
+ tex->tex.zmask_dwords[i] = numdw;
+ tex->tex.zcomp8x8[i] = compsize == 8;
+ } else {
+ tex->tex.zmask_dwords[i] = 0;
+ tex->tex.zcomp8x8[i] = FALSE;
+ }
+ }
+ }
}
static void r300_setup_tiling(struct r300_screen *screen,
- struct r300_texture_desc *desc)
+ struct r300_resource *tex)
{
struct r300_winsys_screen *rws = screen->rws;
- enum pipe_format format = desc->b.b.format;
+ enum pipe_format format = tex->b.b.b.format;
boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350;
boolean is_zb = util_format_is_depth_or_stencil(format);
boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING);
+ tex->tex.microtile = R300_BUFFER_LINEAR;
+ tex->tex.macrotile[0] = R300_BUFFER_LINEAR;
+
if (!util_format_is_plain(format)) {
return;
}
/* If height == 1, disable microtiling except for zbuffer. */
- if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) {
+ if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) {
return;
}
@@ -357,12 +418,12 @@ static void r300_setup_tiling(struct r300_screen *screen,
case 1:
case 4:
case 8:
- desc->microtile = R300_BUFFER_TILED;
+ tex->tex.microtile = R300_BUFFER_TILED;
break;
case 2:
- if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
- desc->microtile = R300_BUFFER_SQUARETILED;
+ if (rws->get_value(rws, R300_VID_DRM_2_1_0)) {
+ tex->tex.microtile = R300_BUFFER_SQUARETILED;
}
break;
}
@@ -372,104 +433,100 @@ static void r300_setup_tiling(struct r300_screen *screen,
}
/* Set macrotiling. */
- if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) &&
- r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) {
- desc->macrotile[0] = R300_BUFFER_TILED;
+ if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
+ tex->tex.macrotile[0] = R300_BUFFER_TILED;
}
}
-static void r300_tex_print_info(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
+static void r300_tex_print_info(struct r300_resource *tex,
const char *func)
{
fprintf(stderr,
"r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, "
"LastLevel: %i, Size: %i, Format: %s\n",
func,
- desc->macrotile[0] ? "YES" : " NO",
- desc->microtile ? "YES" : " NO",
- desc->stride_in_pixels[0],
- desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0,
- desc->b.b.last_level, desc->size_in_bytes,
- util_format_short_name(desc->b.b.format));
+ tex->tex.macrotile[0] ? "YES" : " NO",
+ tex->tex.microtile ? "YES" : " NO",
+ tex->tex.stride_in_pixels[0],
+ tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0,
+ tex->b.b.b.last_level, tex->tex.size_in_bytes,
+ util_format_short_name(tex->b.b.b.format));
}
boolean r300_texture_desc_init(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
- const struct pipe_resource *base,
- enum r300_buffer_tiling microtile,
- enum r300_buffer_tiling macrotile,
- unsigned stride_in_bytes_override,
- unsigned max_buffer_size)
+ struct r300_resource *tex,
+ const struct pipe_resource *base)
{
- desc->b.b = *base;
- desc->b.b.screen = &rscreen->screen;
- desc->stride_in_bytes_override = stride_in_bytes_override;
- desc->width0 = base->width0;
- desc->height0 = base->height0;
- desc->depth0 = base->depth0;
-
- r300_setup_flags(desc);
+ tex->b.b.b.target = base->target;
+ tex->b.b.b.format = base->format;
+ tex->b.b.b.width0 = base->width0;
+ tex->b.b.b.height0 = base->height0;
+ tex->b.b.b.depth0 = base->depth0;
+ tex->b.b.b.array_size = base->array_size;
+ tex->b.b.b.last_level = base->last_level;
+ tex->b.b.b.nr_samples = base->nr_samples;
+ tex->tex.width0 = base->width0;
+ tex->tex.height0 = base->height0;
+ tex->tex.depth0 = base->depth0;
+
+ r300_setup_flags(tex);
/* Align a 3D NPOT texture to POT. */
- if (base->target == PIPE_TEXTURE_3D && desc->is_npot) {
- desc->width0 = util_next_power_of_two(desc->width0);
- desc->height0 = util_next_power_of_two(desc->height0);
- desc->depth0 = util_next_power_of_two(desc->depth0);
+ if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
+ tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
+ tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
+ tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
}
/* Setup tiling. */
- if (microtile == R300_BUFFER_SELECT_LAYOUT ||
- macrotile == R300_BUFFER_SELECT_LAYOUT) {
- r300_setup_tiling(rscreen, desc);
- } else {
- desc->microtile = microtile;
- desc->macrotile[0] = macrotile;
- assert(desc->b.b.last_level == 0);
+ if (tex->tex.microtile == R300_BUFFER_SELECT_LAYOUT) {
+ r300_setup_tiling(rscreen, tex);
}
- r300_setup_cbzb_flags(rscreen, desc);
+ r300_setup_cbzb_flags(rscreen, tex);
/* Setup the miptree description. */
- r300_setup_miptree(rscreen, desc, TRUE);
+ r300_setup_miptree(rscreen, tex, TRUE);
/* If the required buffer size is larger the given max size,
* try again without the alignment for the CBZB clear. */
- if (max_buffer_size && desc->size_in_bytes > max_buffer_size) {
- r300_setup_miptree(rscreen, desc, FALSE);
+ if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) {
+ r300_setup_miptree(rscreen, tex, FALSE);
}
- r300_texture_3d_fix_mipmapping(rscreen, desc);
+ r300_texture_3d_fix_mipmapping(rscreen, tex);
+ r300_setup_zmask_flags(rscreen, tex);
- if (max_buffer_size) {
+ if (tex->buf_size) {
/* Make sure the buffer we got is large enough. */
- if (desc->size_in_bytes > max_buffer_size) {
+ if (tex->tex.size_in_bytes > tex->buf_size) {
fprintf(stderr, "r300: texture_desc_init: The buffer is not "
"large enough. Got: %i, Need: %i, Info:\n",
- max_buffer_size, desc->size_in_bytes);
- r300_tex_print_info(rscreen, desc, "texture_desc_init");
+ tex->buf_size, tex->tex.size_in_bytes);
+ r300_tex_print_info(tex, "texture_desc_init");
return FALSE;
}
- desc->buffer_size_in_bytes = max_buffer_size;
+ tex->tex.buffer_size_in_bytes = tex->buf_size;
} else {
- desc->buffer_size_in_bytes = desc->size_in_bytes;
+ tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes;
}
if (SCREEN_DBG_ON(rscreen, DBG_TEX))
- r300_tex_print_info(rscreen, desc, "texture_desc_init");
+ r300_tex_print_info(tex, "texture_desc_init");
return TRUE;
}
-unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer)
{
- unsigned offset = desc->offset_in_bytes[level];
+ unsigned offset = tex->tex.offset_in_bytes[level];
- switch (desc->b.b.target) {
+ switch (tex->b.b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- return offset + layer * desc->layer_size_in_bytes[level];
+ return offset + layer * tex->tex.layer_size_in_bytes[level];
default:
assert(layer == 0);
diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h
index 121d215b4cb..ce6e9643ec6 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.h
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -30,7 +30,7 @@
struct pipe_resource;
struct r300_screen;
struct r300_texture_desc;
-struct r300_texture;
+struct r300_resource;
enum r300_dim {
DIM_WIDTH = 0,
@@ -44,14 +44,10 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
enum r300_dim dim, boolean is_rs690);
boolean r300_texture_desc_init(struct r300_screen *rscreen,
- struct r300_texture_desc *desc,
- const struct pipe_resource *base,
- enum r300_buffer_tiling microtile,
- enum r300_buffer_tiling macrotile,
- unsigned stride_in_bytes_override,
- unsigned max_buffer_size);
-
-unsigned r300_texture_get_offset(struct r300_texture_desc *desc,
+ struct r300_resource *tex,
+ const struct pipe_resource *base);
+
+unsigned r300_texture_get_offset(struct r300_resource *tex,
unsigned level, unsigned layer);
#endif
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 15a323989b2..97ec0a1a1f2 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -191,7 +191,12 @@ static void transform_dstreg(
dst->File = translate_register_file(src->Register.File);
dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
dst->WriteMask = src->Register.WriteMask;
- dst->RelAddr = src->Register.Indirect;
+
+ if (src->Register.Indirect) {
+ ttr->error = TRUE;
+ fprintf(stderr, "r300: Relative addressing of destination operands "
+ "is unsupported.\n");
+ }
}
static void transform_srcreg(
@@ -332,6 +337,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
unsigned imm_index = 0;
int i;
+ ttr->error = FALSE;
+
/* Allocate constants placeholders.
*
* Note: What if declared constants are not contiguous? */
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
index 97641a954b9..adb044cfe56 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -47,6 +47,9 @@ struct tgsi_to_rc {
/* Vertex shaders have no half swizzles, and no way to handle them, so
* until rc grows proper support, indicate if they're safe to use. */
boolean use_half_swizzles;
+
+ /* If an error occured. */
+ boolean error;
};
void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index ae93fab554e..30de9ec1e32 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -37,7 +37,7 @@ struct r300_transfer {
unsigned offset;
/* Linear texture. */
- struct r300_texture *linear_texture;
+ struct r300_resource *linear_texture;
};
/* Convenience cast wrapper. */
@@ -54,7 +54,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
struct pipe_resource *tex = transfer->resource;
- ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, 0,
+ ctx->resource_copy_region(ctx, &r300transfer->linear_texture->b.b.b, 0,
0, 0, 0,
tex, transfer->level, &transfer->box);
}
@@ -70,7 +70,7 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
ctx->resource_copy_region(ctx, tex, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
- &r300transfer->linear_texture->desc.b.b, 0, &src_box);
+ &r300transfer->linear_texture->b.b.b, 0, &src_box);
ctx->flush(ctx, 0, NULL);
}
@@ -83,25 +83,24 @@ r300_texture_get_transfer(struct pipe_context *ctx,
const struct pipe_box *box)
{
struct r300_context *r300 = r300_context(ctx);
- struct r300_texture *tex = r300_texture(texture);
+ struct r300_resource *tex = r300_resource(texture);
struct r300_transfer *trans;
struct pipe_resource base;
boolean referenced_cs, referenced_hw, blittable;
+ const struct util_format_description *desc =
+ util_format_description(texture->format);
referenced_cs =
- r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_CS);
+ r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf);
if (referenced_cs) {
referenced_hw = TRUE;
} else {
referenced_hw =
- r300->rws->cs_is_buffer_referenced(r300->cs,
- tex->cs_buffer, R300_REF_HW);
+ r300->rws->buffer_is_busy(tex->buf);
}
- blittable = ctx->screen->is_format_supported(
- ctx->screen, texture->format, texture->target, 0,
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0);
+ blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->layout == UTIL_FORMAT_LAYOUT_S3TC;
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
@@ -114,8 +113,13 @@ r300_texture_get_transfer(struct pipe_context *ctx,
/* If the texture is tiled, we must create a temporary detiled texture
* for this transfer.
* Also make write transfers pipelined. */
- if (tex->desc.microtile || tex->desc.macrotile[level] ||
- ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
+ if (tex->tex.microtile || tex->tex.macrotile[level] ||
+ (referenced_hw && blittable && !(usage & PIPE_TRANSFER_READ))) {
+ if (r300->blitter->running) {
+ fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n");
+ os_break();
+ }
+
base.target = PIPE_TEXTURE_2D;
base.format = texture->format;
base.width0 = box->width;
@@ -140,7 +144,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
/* Create the temporary texture. */
- trans->linear_texture = r300_texture(
+ trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
@@ -149,14 +153,14 @@ r300_texture_get_transfer(struct pipe_context *ctx,
* Let's flush and try again. */
ctx->flush(ctx, 0, NULL);
- trans->linear_texture = r300_texture(
+ trans->linear_texture = r300_resource(
ctx->screen->resource_create(ctx->screen,
&base));
if (!trans->linear_texture) {
/* For linear textures, it's safe to fallback to
* an unpipelined transfer. */
- if (!tex->desc.microtile && !tex->desc.macrotile[level]) {
+ if (!tex->tex.microtile && !tex->tex.macrotile[level]) {
goto unpipelined;
}
@@ -168,8 +172,8 @@ r300_texture_get_transfer(struct pipe_context *ctx,
}
}
- assert(!trans->linear_texture->desc.microtile &&
- !trans->linear_texture->desc.macrotile[0]);
+ assert(!trans->linear_texture->tex.microtile &&
+ !trans->linear_texture->tex.macrotile[0]);
/* Set the stride.
*
@@ -179,7 +183,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
* right thing internally.
*/
trans->transfer.stride =
- trans->linear_texture->desc.stride_in_bytes[0];
+ trans->linear_texture->tex.stride_in_bytes[0];
if (usage & PIPE_TRANSFER_READ) {
/* We cannot map a tiled texture directly because the data is
@@ -194,8 +198,8 @@ r300_texture_get_transfer(struct pipe_context *ctx,
unpipelined:
/* Unpipelined transfer. */
- trans->transfer.stride = tex->desc.stride_in_bytes[level];
- trans->offset = r300_texture_get_offset(&tex->desc, level, box->z);
+ trans->transfer.stride = tex->tex.stride_in_bytes[level];
+ trans->offset = r300_texture_get_offset(tex, level, box->z);
if (referenced_cs)
ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
@@ -227,20 +231,19 @@ void* r300_texture_transfer_map(struct pipe_context *ctx,
struct r300_context *r300 = r300_context(ctx);
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
- struct r300_texture *tex = r300_texture(transfer->resource);
+ struct r300_resource *tex = r300_resource(transfer->resource);
char *map;
- enum pipe_format format = tex->desc.b.b.format;
+ enum pipe_format format = tex->b.b.b.format;
if (r300transfer->linear_texture) {
/* The detiled texture is of the same size as the region being mapped
* (no offset needed). */
- return rws->buffer_map(rws,
- r300transfer->linear_texture->buffer,
+ return rws->buffer_map(r300transfer->linear_texture->buf,
r300->cs,
transfer->usage);
} else {
/* Tiling is disabled. */
- map = rws->buffer_map(rws, tex->buffer, r300->cs,
+ map = rws->buffer_map(tex->buf, r300->cs,
transfer->usage);
if (!map) {
@@ -258,11 +261,11 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
{
struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
struct r300_transfer *r300transfer = r300_transfer(transfer);
- struct r300_texture *tex = r300_texture(transfer->resource);
+ struct r300_resource *tex = r300_resource(transfer->resource);
if (r300transfer->linear_texture) {
- rws->buffer_unmap(rws, r300transfer->linear_texture->buffer);
+ rws->buffer_unmap(r300transfer->linear_texture->buf);
} else {
- rws->buffer_unmap(rws, tex->buffer);
+ rws->buffer_unmap(tex->buf);
}
}
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 78021e2c5d4..b319890157f 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -226,6 +226,13 @@ void r300_translate_vertex_shader(struct r300_context *r300,
r300_tgsi_to_rc(&ttr, vs->state.tokens);
+ if (ttr.error) {
+ fprintf(stderr, "r300 VP: Cannot translate a shader. "
+ "Using a dummy shader instead.\n");
+ r300_dummy_vertex_shader(r300, vs);
+ return;
+ }
+
if (compiler.Base.Program.Constants.Count > 200) {
compiler.Base.remove_unused_constants = TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 460da77a4fb..d5c73585c81 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -28,38 +28,45 @@
* Any winsys hosting this pipe needs to implement r300_winsys_screen and then
* call r300_screen_create to start things. */
+#include "r300_defines.h"
+
+#include "pipebuffer/pb_bufmgr.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
-#include "r300_defines.h"
-
#define R300_MAX_CMDBUF_DWORDS (16 * 1024)
struct winsys_handle;
struct r300_winsys_screen;
-struct r300_winsys_buffer; /* for map/unmap etc. */
-struct r300_winsys_cs_buffer; /* for write_reloc etc. */
+#define r300_winsys_bo pb_buffer
+#define r300_winsys_bo_reference(pdst, src) pb_reference(pdst, src)
+
+struct r300_winsys_cs_handle; /* for write_reloc etc. */
struct r300_winsys_cs {
- unsigned cdw; /* Number of used dwords. */
- uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */
+ unsigned cdw; /* Number of used dwords. */
+ uint32_t *buf; /* The command buffer. */
};
enum r300_value_id {
R300_VID_PCI_ID,
R300_VID_GB_PIPES,
R300_VID_Z_PIPES,
- R300_VID_SQUARE_TILING_SUPPORT,
- R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */
- R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */
- R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer */
- R300_CAN_HYPERZ,
-};
-
-enum r300_reference_domain { /* bitfield */
- R300_REF_CS = 1,
- R300_REF_HW = 2
+ R300_VID_GART_SIZE,
+ R300_VID_VRAM_SIZE,
+ R300_VID_DRM_MAJOR,
+ R300_VID_DRM_MINOR,
+ R300_VID_DRM_PATCHLEVEL,
+
+ /* These should probably go away: */
+ R300_VID_DRM_2_1_0, /* Square tiling. */
+ R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */
+ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */
+ R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */
+
+ R300_CAN_HYPERZ, /* ZMask + HiZ */
+ R300_CAN_AACOMPRESS, /* CMask */
};
struct r300_winsys_screen {
@@ -98,91 +105,78 @@ struct r300_winsys_screen {
* \param domain A bitmask of the R300_DOMAIN_* flags.
* \return The created buffer object.
*/
- struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws,
+ struct r300_winsys_bo *(*buffer_create)(struct r300_winsys_screen *ws,
unsigned size,
unsigned alignment,
unsigned bind,
unsigned usage,
enum r300_buffer_domain domain);
- struct r300_winsys_cs_buffer *(*buffer_get_cs_handle)(
- struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
-
- /**
- * Reference a buffer object (assign with reference counting).
- *
- * \param ws The winsys this function is called from.
- * \param pdst A destination pointer to set the source buffer to.
- * \param src A source buffer object.
- */
- void (*buffer_reference)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer **pdst,
- struct r300_winsys_buffer *src);
+ struct r300_winsys_cs_handle *(*buffer_get_cs_handle)(
+ struct r300_winsys_bo *buf);
/**
* Map the entire data store of a buffer object into the client's address
* space.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to map.
* \param cs A command stream to flush if the buffer is referenced by it.
* \param usage A bitmask of the PIPE_TRANSFER_* flags.
* \return The pointer at the beginning of the buffer.
*/
- void *(*buffer_map)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void *(*buffer_map)(struct r300_winsys_bo *buf,
struct r300_winsys_cs *cs,
enum pipe_transfer_usage usage);
/**
* Unmap a buffer object from the client's address space.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to unmap.
*/
- void (*buffer_unmap)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
+ void (*buffer_unmap)(struct r300_winsys_bo *buf);
+
+ /**
+ * Return TRUE if a buffer object is being used by the GPU.
+ *
+ * \param buf A winsys buffer object.
+ */
+ boolean (*buffer_is_busy)(struct r300_winsys_bo *buf);
/**
* Wait for a buffer object until it is not used by a GPU. This is
* equivalent to a fence placed after the last command using the buffer,
* and synchronizing to the fence.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to wait for.
*/
- void (*buffer_wait)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf);
+ void (*buffer_wait)(struct r300_winsys_bo *buf);
/**
* Return tiling flags describing a memory layout of a buffer object.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to get the flags from.
* \param macrotile A pointer to the return value of the microtile flag.
* \param microtile A pointer to the return value of the macrotile flag.
*
* \note microtile and macrotile are not bitmasks!
*/
- void (*buffer_get_tiling)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void (*buffer_get_tiling)(struct r300_winsys_bo *buf,
enum r300_buffer_tiling *microtile,
enum r300_buffer_tiling *macrotile);
/**
* Set tiling flags describing a memory layout of a buffer object.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to set the flags for.
+ * \param cs A command stream to flush if the buffer is referenced by it.
* \param macrotile A macrotile flag.
* \param microtile A microtile flag.
* \param stride A stride of the buffer in bytes, for texturing.
*
* \note microtile and macrotile are not bitmasks!
*/
- void (*buffer_set_tiling)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ void (*buffer_set_tiling)(struct r300_winsys_bo *buf,
+ struct r300_winsys_cs *cs,
enum r300_buffer_tiling microtile,
enum r300_buffer_tiling macrotile,
unsigned stride);
@@ -197,7 +191,7 @@ struct r300_winsys_screen {
* \param stride The returned buffer stride in bytes.
* \param size The returned buffer size.
*/
- struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws,
+ struct r300_winsys_bo *(*buffer_from_handle)(struct r300_winsys_screen *ws,
struct winsys_handle *whandle,
unsigned *stride,
unsigned *size);
@@ -206,14 +200,12 @@ struct r300_winsys_screen {
* Get a winsys handle from a winsys buffer. The internal structure
* of the handle is platform-specific and only a winsys should access it.
*
- * \param ws The winsys this function is called from.
* \param buf A winsys buffer object to get the handle from.
* \param whandle A winsys handle pointer.
* \param stride A stride of the buffer in bytes, for texturing.
* \return TRUE on success.
*/
- boolean (*buffer_get_handle)(struct r300_winsys_screen *ws,
- struct r300_winsys_buffer *buf,
+ boolean (*buffer_get_handle)(struct r300_winsys_bo *buf,
unsigned stride,
struct winsys_handle *whandle);
@@ -248,7 +240,7 @@ struct r300_winsys_screen {
* \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
void (*cs_add_reloc)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf,
+ struct r300_winsys_cs_handle *buf,
enum r300_buffer_domain rd,
enum r300_buffer_domain wd);
@@ -269,7 +261,7 @@ struct r300_winsys_screen {
* \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags.
*/
void (*cs_write_reloc)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf);
+ struct r300_winsys_cs_handle *buf);
/**
* Flush a command stream.
@@ -279,6 +271,13 @@ struct r300_winsys_screen {
void (*cs_flush)(struct r300_winsys_cs *cs);
/**
+ * Wait until the last flush is completed.
+ *
+ * \param cs A command stream.
+ */
+ void (*cs_sync_flush)(struct r300_winsys_cs *cs);
+
+ /**
* Set a flush callback which is called from winsys when flush is
* required.
*
@@ -291,16 +290,13 @@ struct r300_winsys_screen {
void *user);
/**
- * Return TRUE if a buffer is referenced by a command stream or by hardware
- * (i.e. is busy), based on the domain parameter.
+ * Return TRUE if a buffer is referenced by a command stream.
*
* \param cs A command stream.
* \param buf A winsys buffer.
- * \param domain A bitmask of the R300_REF_* enums.
*/
boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs,
- struct r300_winsys_cs_buffer *buf,
- enum r300_reference_domain domain);
+ struct r300_winsys_cs_handle *buf);
};
#endif /* R300_WINSYS_H */
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index a690b671e49..436de9c4dbd 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -22,7 +22,6 @@ C_SOURCES = \
evergreen_state.c \
eg_asm.c \
r600_translate.c \
- r600_state_common.c \
- r600_upload.c
+ r600_state_common.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index e51f50c5df5..5a5fa6d65fd 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -28,7 +28,6 @@ r600 = env.ConvenienceLibrary(
'r600_state_common.c',
'r600_texture.c',
'r600_translate.c',
- 'r600_upload.c',
'r700_asm.c',
'evergreen_state.c',
'eg_asm.c',
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index 5a39d7cdeec..b5fcc7106fe 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_030000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_030000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_030000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_030000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_030000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -289,10 +293,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_SWAP_ALT;
+
case PIPE_FORMAT_A8_UNORM:
return V_028C70_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_SWAP_STD;
@@ -313,6 +321,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_028C70_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_SWAP_STD;
@@ -352,9 +361,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_SWAP_STD_REV;
+ return V_028C70_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_028C70_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_028C70_SWAP_STD;
@@ -362,14 +373,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return V_028C70_COLOR_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return V_028C70_COLOR_16_16_16_16_FLOAT;
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_028C70_COLOR_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -381,9 +391,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
/* 8-bit buffers. */
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_028C70_COLOR_4_4;
+
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_028C70_COLOR_8;
@@ -404,6 +418,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_028C70_COLOR_8_8;
@@ -430,7 +445,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_028C70_COLOR_10_10_10_2;
+ return V_028C70_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -471,6 +486,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_028C70_COLOR_32_32;
/* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_028C70_COLOR_32_32_32_32;
case PIPE_FORMAT_R32G32B32_FLOAT:
return V_028C70_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -501,9 +519,4 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
-{
- return r600_translate_colorformat(format) != ~0;
-}
-
#endif
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 306ca03234f..3efdbaba0c3 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -103,7 +103,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
}
blend->cb_target_mask = target_mask;
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
- color_control, 0xFFFFFFFF, NULL);
+ color_control, 0xFFFFFFFD, NULL);
r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
for (int i = 0; i < 8; i++) {
@@ -351,7 +351,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
struct r600_resource *rbuffer;
unsigned format;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
- unsigned char swizzle[4];
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
if (resource == NULL)
@@ -380,35 +380,42 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_030010_NUM_FORMAT_ALL;
+ word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT);
+ }
+
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
+
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
/* FIXME properly handle first level != 0 */
r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
S_030000_DIM(r600_tex_dim(texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
S_030004_TEX_HEIGHT(texture->height0 - 1) |
- S_030004_TEX_DEPTH(texture->depth0 - 1),
+ S_030004_TEX_DEPTH(texture->depth0 - 1) |
+ S_030004_ARRAY_MODE(array_mode),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) |
+ word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) |
S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
@@ -431,7 +438,8 @@ static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned cou
for (int i = 0; i < count; i++) {
if (resource[i]) {
- evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
}
}
}
@@ -446,9 +454,11 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -457,7 +467,8 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -638,11 +649,19 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
unsigned color_info;
unsigned format, swap, ntype;
unsigned offset;
+ unsigned tile_type;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
@@ -651,21 +670,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_028C70_FORMAT(format) |
S_028C70_COMP_SWAP(swap) |
+ S_028C70_ARRAY_MODE(rtex->array_mode[level]) |
S_028C70_BLEND_CLAMP(1) |
S_028C70_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_028C70_SOURCE_FORMAT(1);
+
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ /* we can only set the export size if any thing is snorm/unorm component is > 11 bits,
+ if we aren't a float, sint or uint */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
+ ntype != 4 && ntype != 5)
+ color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC);
+
+ if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) {
+ tile_type = rtex->tile_type;
+ } else /* workaround for linear buffers */
+ tile_type = 1;
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
@@ -690,7 +731,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
- S_028C74_NON_DISP_TILING_ORDER(1),
+ S_028C74_NON_DISP_TILING_ORDER(tile_type),
0xFFFFFFFF, bo[0]);
}
@@ -711,17 +752,14 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
@@ -837,51 +875,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
- uint32_t offset;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- r600_upload_const_buffer(rctx, buffer, &offset);
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void evergreen_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = evergreen_create_blend_state;
@@ -909,7 +902,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.delete_vs_state = r600_delete_vs_shader;
rctx->context.set_blend_color = evergreen_set_blend_color;
rctx->context.set_clip_state = evergreen_set_clip_state;
- rctx->context.set_constant_buffer = evergreen_set_constant_buffer;
+ rctx->context.set_constant_buffer = r600_set_constant_buffer;
rctx->context.set_fragment_sampler_views = evergreen_set_ps_sampler_view;
rctx->context.set_framebuffer_state = evergreen_set_framebuffer_state;
rctx->context.set_polygon_stipple = evergreen_set_polygon_stipple;
@@ -921,6 +914,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view;
rctx->context.set_viewport_state = evergreen_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void evergreen_init_config(struct r600_pipe_context *rctx)
@@ -1325,216 +1319,6 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-static void evergreen_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i];
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = 0;
- }
- if (vertex_buffer == NULL || rbuffer == NULL)
- continue;
- offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
-
- r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
- rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
- S_030008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
- S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
- S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
- S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
- S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
- 0xC0000000, 0xFFFFFFFF, NULL);
- evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
- }
-}
-
-int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
- struct r600_drawl draw;
- unsigned prim;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
-
- switch (draw.index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw.index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw.mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- evergreen_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw.count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw.index_buffer) {
- rbuffer = (struct r600_resource*)draw.index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw.index_buffer_offset;
- }
- evergreen_context_draw(&rctx->ctx, &rdraw);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_state *rstate = &shader->rstate;
@@ -1733,3 +1517,31 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028000_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index e09e02ca000..f0a1ee0cd02 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -327,6 +327,9 @@
#define S_028C70_SOURCE_FORMAT(x) (((x) & 0x3) << 24)
#define G_028C70_SOURCE_FORMAT(x) (((x) >> 24) & 0x3)
#define C_028C70_SOURCE_FORMAT 0xFCFFFFFF
+#define V_028C70_EXPORT_4C_32BPC 0x0
+#define V_028C70_EXPORT_4C_16BPC 0x1
+#define V_028C70_EXPORT_2C_32BPC 0x2 /* Do not use */
#define S_028C70_RAT(x) (((x) & 0x1) << 26)
#define G_028C70_RAT(x) (((x) >> 26) & 0x1)
#define C_028C70_RAT 0xFBFFFFFF
@@ -427,15 +430,6 @@
#define C_028800_STENCILZFAIL_BF 0x1FFFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
@@ -939,6 +933,9 @@
#define V_030000_SQ_TEX_DIM_2D_ARRAY 0x00000005
#define V_030000_SQ_TEX_DIM_2D_MSAA 0x00000006
#define V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA 0x00000007
+#define S_030000_NON_DISP_TILING_ORDER(x) (((x) & 0x1) << 5)
+#define G_030000_NON_DISP_TILING_ORDER(x) (((x) >> 5) & 0x1)
+#define C_030000_NON_DISP_TILING_ORDER 0xFFFFFFDF
#define S_030000_PITCH(x) (((x) & 0xFFF) << 6)
#define G_030000_PITCH(x) (((x) >> 6) & 0xFFF)
#define C_030000_PITCH 0xFFFC003F
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index a852bef6156..64c52bca795 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -113,6 +113,7 @@ struct r600_tiling_info {
enum radeon_family r600_get_family(struct radeon *rw);
enum chip_class r600_get_family_class(struct radeon *radeon);
struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
+unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
/* r600_bo.c */
struct r600_bo;
@@ -249,6 +250,7 @@ struct r600_context {
struct list_head query_list;
unsigned num_query_running;
struct list_head fenced_bo;
+ unsigned max_db; /* for OQ */
};
struct r600_draw {
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f4ff2fc3d43..1393df88757 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -50,6 +50,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
return 0;
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
@@ -97,6 +98,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
return 0;
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+ case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
@@ -288,6 +290,31 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
{
int r;
+ if (bc->cf_last && bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+ output->type == bc->cf_last->output.type &&
+ output->elem_size == bc->cf_last->output.elem_size &&
+ output->swizzle_x == bc->cf_last->output.swizzle_x &&
+ output->swizzle_y == bc->cf_last->output.swizzle_y &&
+ output->swizzle_z == bc->cf_last->output.swizzle_z &&
+ output->swizzle_w == bc->cf_last->output.swizzle_w &&
+ (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+ if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+ (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+ bc->cf_last->output.gpr = output->gpr;
+ bc->cf_last->output.array_base = output->array_base;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+
+ } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+ output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+ }
+ }
+
r = r600_bc_add_cf(bc);
if (r)
return r;
@@ -418,6 +445,20 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
}
}
+static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+ switch (bc->chiprev) {
+ case CHIPREV_R600:
+ case CHIPREV_R700:
+ return !alu->is_op3 &&
+ alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ case CHIPREV_EVERGREEN:
+ default:
+ return !alu->is_op3 &&
+ alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ }
+}
+
static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
{
switch (bc->chiprev) {
@@ -480,9 +521,9 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
case CHIPREV_EVERGREEN:
default:
if (!alu->is_op3)
+ /* Note that FLT_TO_INT* instructions are vector instructions
+ * on Evergreen, despite what the documentation says. */
return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
- alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR ||
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
@@ -563,7 +604,7 @@ struct alu_bank_swizzle {
int hw_cfile_elem[4];
};
-const unsigned cycle_for_bank_swizzle_vec[][3] = {
+static const unsigned cycle_for_bank_swizzle_vec[][3] = {
[SQ_ALU_VEC_012] = { 0, 1, 2 },
[SQ_ALU_VEC_021] = { 0, 2, 1 },
[SQ_ALU_VEC_120] = { 1, 2, 0 },
@@ -572,7 +613,7 @@ const unsigned cycle_for_bank_swizzle_vec[][3] = {
[SQ_ALU_VEC_210] = { 2, 1, 0 }
};
-const unsigned cycle_for_bank_swizzle_scl[][3] = {
+static const unsigned cycle_for_bank_swizzle_scl[][3] = {
[SQ_ALU_SCL_210] = { 2, 1, 0 },
[SQ_ALU_SCL_122] = { 1, 2, 2 },
[SQ_ALU_SCL_212] = { 2, 1, 2 },
@@ -785,7 +826,8 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
for (i = 0; i < 5; ++i) {
if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
gpr[i] = prev[i]->dst.sel;
- if (is_alu_reduction_inst(bc, prev[i]))
+ /* cube writes more than PV.X */
+ if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
chan[i] = 0;
else
chan[i] = prev[i]->dst.chan;
@@ -865,7 +907,7 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
for (i = 0; i < num_src; ++i) {
if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- uint32_t value = alu->src[i].value[alu->src[i].chan];
+ uint32_t value = alu->src[i].value;
unsigned found = 0;
for (j = 0; j < *nliteral; ++j) {
if (literal[j] == value) {
@@ -892,7 +934,7 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
for (i = 0; i < num_src; ++i) {
if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- uint32_t value = alu->src[i].value[alu->src[i].chan];
+ uint32_t value = alu->src[i].value;
for (j = 0; j < nliteral; ++j) {
if (literal[j] == value) {
alu->src[i].chan = j;
@@ -1195,8 +1237,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
bc->ngpr = nalu->src[i].sel + 1;
}
if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
- r600_bc_special_constants(
- nalu->src[i].value[nalu->src[i].chan],
+ r600_bc_special_constants(nalu->src[i].value,
&nalu->src[i].sel, &nalu->src[i].neg);
}
if (nalu->dst.sel >= bc->ngpr) {
@@ -1308,6 +1349,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
return -ENOMEM;
memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+ /* we can't fetch data und use it as texture lookup address in the same TEX clause */
+ if (bc->cf_last != NULL &&
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+ struct r600_bc_tex *ttex;
+ LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
+ if (ttex->dst_gpr == ntex->src_gpr) {
+ bc->force_add_cf = 1;
+ break;
+ }
+ }
+ }
+
/* cf can contains only alu or only vtx or only tex */
if (bc->cf_last == NULL ||
bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
@@ -1374,6 +1427,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
}
}
bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) |
+ S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
@@ -2674,18 +2728,73 @@ void r600_bc_dump(struct r600_bc *bc)
}
LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
- //TODO
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", tex->inst);
+ fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id);
+ fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr);
+ fprintf(stderr, "REL:%d)\n", tex->src_rel);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr);
+ fprintf(stderr, "REL:%d ", tex->dst_rel);
+ fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w);
+ fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias);
+ fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x);
+ fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y);
+ fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z);
+ fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w);
+ id++;
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "OFFSET_X:%d ", tex->offset_x);
+ fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y);
+ fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z);
+ fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id);
+ fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z);
+ fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w);
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
}
LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "INST:%d ", vtx->inst);
+ fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type);
+ fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id);
+ id++;
+ /* This assumes that no semantic fetches exist */
+ fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
+ fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr);
+ fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x);
+ fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count);
+ fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr);
+ fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x);
+ fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y);
+ fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z);
+ fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w);
+ fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields);
+ fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format);
+ fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all);
+ fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all);
+ fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all);
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
//TODO
+ id++;
+ fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]);
+ id++;
}
}
fprintf(stderr, "--------------------------------------\n");
}
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
{
struct r600_pipe_state *rstate;
unsigned i = 0;
@@ -2721,42 +2830,6 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
0xFFFFFFFF, ve->fetch_shader);
}
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
-{
- struct r600_pipe_state *rstate;
- unsigned i = 0;
-
- if (count > 8) {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(8 - 1);
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((count - 8) - 1);
- } else {
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
- S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT(count - 1);
- }
- bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
- bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
- S_SQ_CF_WORD1_BARRIER(1);
-
- rstate = &ve->rstate;
- rstate->id = R600_PIPE_STATE_FETCH_SHADER;
- rstate->nregs = 0;
- r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
- r600_bo_offset(ve->fetch_shader) >> 8,
- 0xFFFFFFFF, ve->fetch_shader);
-}
-
static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
unsigned *num_format, unsigned *format_comp)
{
@@ -2780,7 +2853,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
}
switch (desc->channel[i].type) {
- /* Half-floats, floats, doubles */
+ /* Half-floats, floats, ints */
case UTIL_FORMAT_TYPE_FLOAT:
switch (desc->channel[i].size) {
case 16:
@@ -2792,8 +2865,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16_FLOAT;
break;
case 3:
- *format = FMT_16_16_16_FLOAT;
- break;
case 4:
*format = FMT_16_16_16_16_FLOAT;
break;
@@ -2833,8 +2904,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_8_8;
break;
case 3:
- // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_8_8_8_8;
break;
@@ -2849,8 +2918,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
*format = FMT_16_16;
break;
case 3:
- // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
- // break;
case 4:
*format = FMT_16_16_16_16;
break;
@@ -2938,10 +3005,10 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
for (i = 0; i < ve->count; i++) {
unsigned vbuffer_index;
- r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
- desc = util_format_description(ve->hw_format[i]);
+ r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp);
+ desc = util_format_description(ve->elements[i].src_format);
if (desc == NULL) {
- R600_ERR("unknown format %d\n", ve->hw_format[i]);
+ R600_ERR("unknown format %d\n", ve->elements[i].src_format);
r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
return -EINVAL;
}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 519245f3af2..453c29790c1 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -34,7 +34,7 @@ struct r600_bc_alu_src {
unsigned neg;
unsigned abs;
unsigned rel;
- u32 *value;
+ uint32_t value;
};
struct r600_bc_alu_dst {
@@ -201,8 +201,6 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
void r600_bc_dump(struct r600_bc *bc);
-void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
-void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index b9ec9592e35..9865ea17ae5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -36,6 +36,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ rctx->blit = true;
r600_context_queries_suspend(&rctx->ctx);
util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]);
@@ -53,9 +54,9 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
if (rctx->states[R600_PIPE_STATE_CLIP]) {
util_blitter_save_clip(rctx->blitter, &rctx->clip);
}
- util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer);
-
- rctx->vertex_elements = NULL;
+ util_blitter_save_vertex_buffers(rctx->blitter,
+ rctx->vbuf_mgr->nr_vertex_buffers,
+ rctx->vbuf_mgr->vertex_buffer);
if (op & (R600_CLEAR_SURFACE | R600_COPY))
util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
@@ -76,6 +77,7 @@ static void r600_blitter_end(struct pipe_context *ctx)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
r600_context_queries_resume(&rctx->ctx);
+ rctx->blit = false;
}
void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
@@ -84,13 +86,17 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
int level = 0;
float depth = 1.0f;
- surf_tmpl.format = texture->resource.base.b.format;
+
+ if (!texture->dirty_db)
+ return;
+
+ surf_tmpl.format = texture->resource.b.b.b.format;
surf_tmpl.u.tex.level = level;
surf_tmpl.u.tex.first_layer = 0;
surf_tmpl.u.tex.last_layer = 0;
surf_tmpl.usage = PIPE_BIND_DEPTH_STENCIL;
- zsurf = ctx->create_surface(ctx, &texture->resource.base.b, &surf_tmpl);
+ zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl);
surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
@@ -107,6 +113,48 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
pipe_surface_reference(&zsurf, NULL);
pipe_surface_reference(&cbsurf, NULL);
+
+ texture->dirty_db = FALSE;
+}
+
+void r600_flush_depth_textures(struct r600_pipe_context *rctx)
+{
+ unsigned int i;
+
+ if (rctx->blit) return;
+
+ /* FIXME: This handles fragment shader textures only. */
+
+ for (i = 0; i < rctx->ps_samplers.n_views; ++i) {
+ struct r600_pipe_sampler_view *view;
+ struct r600_resource_texture *tex;
+
+ view = rctx->ps_samplers.views[i];
+ if (!view) continue;
+
+ tex = (struct r600_resource_texture *)view->base.texture;
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
+
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
+
+ /* also check CB here */
+ for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ struct r600_resource_texture *tex;
+ tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture;
+
+ if (!tex->depth)
+ continue;
+
+ if (tex->is_flushing_texture)
+ continue;
+
+ r600_blit_uncompress_depth(&rctx->context, tex);
+ }
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
@@ -171,6 +219,52 @@ static void r600_hw_copy_region(struct pipe_context *ctx,
r600_blitter_end(ctx);
}
+struct texture_orig_info {
+ unsigned format;
+ unsigned width0;
+ unsigned height0;
+};
+
+static void r600_s3tc_to_blittable(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ unsigned pixsize = util_format_get_blocksize(tex->format);
+ int new_format;
+ int new_height, new_width;
+
+ orig->format = tex->format;
+ orig->width0 = tex->width0;
+ orig->height0 = tex->height0;
+
+ if (pixsize == 8)
+ new_format = PIPE_FORMAT_R16G16B16A16_UNORM; /* 64-bit block */
+ else
+ new_format = PIPE_FORMAT_R32G32B32A32_UNORM; /* 128-bit block */
+
+ new_width = util_format_get_nblocksx(tex->format, orig->width0);
+ new_height = util_format_get_nblocksy(tex->format, orig->height0);
+
+ rtex->force_int_type = true;
+ tex->width0 = new_width;
+ tex->height0 = new_height;
+ tex->format = new_format;
+
+}
+
+static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
+ rtex->force_int_type = false;
+
+ tex->format = orig->format;
+ tex->width0 = orig->width0;
+ tex->height0 = orig->height0;
+}
+
static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -179,15 +273,36 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
unsigned src_level,
const struct pipe_box *src_box)
{
- boolean is_depth;
- /* there is something wrong with depth resource copies at the moment so avoid them for now */
- is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
- if (is_depth)
- util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
- else
- r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
+ struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src;
+ struct texture_orig_info orig_info[2];
+ boolean restore_orig[2];
+
+ if (rsrc->depth && !rsrc->is_flushing_texture)
+ r600_texture_depth_flush(ctx, src, FALSE);
+
+ restore_orig[0] = restore_orig[1] = FALSE;
+
+ if (util_format_is_s3tc(src->format)) {
+ r600_s3tc_to_blittable(src, src_level, &orig_info[0]);
+ restore_orig[0] = TRUE;
+ }
+
+ if (util_format_is_s3tc(dst->format)) {
+ r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]);
+ restore_orig[1] = TRUE;
+ /* translate the dst box as well */
+ dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
+ dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
+ }
+
+ r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+
+ if (restore_orig[0])
+ r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]);
+
+ if (restore_orig[1])
+ r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]);
}
void r600_init_blit_functions(struct r600_pipe_context *rctx)
@@ -197,3 +312,19 @@ void r600_init_blit_functions(struct r600_pipe_context *rctx)
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
}
+
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture)
+{
+ struct pipe_box sbox;
+
+ sbox.x = sbox.y = sbox.z = 0;
+ sbox.width = texture->resource.b.b.b.width0;
+ sbox.height = texture->resource.b.b.b.height0;
+ /* XXX that might be wrong */
+ sbox.depth = 1;
+
+ r600_hw_copy_region(ctx, (struct pipe_resource *)texture, 0,
+ 0, 0, 0,
+ (struct pipe_resource *)texture->flushed_depth_texture, 0,
+ &sbox);
+}
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 469c8195fe9..0c5d7133c7a 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -29,85 +29,58 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
+#include "util/u_upload_mgr.h"
+
#include "state_tracker/drm_driver.h"
+
#include <xf86drm.h>
#include "radeon_drm.h"
+
#include "r600.h"
#include "r600_pipe.h"
-extern struct u_resource_vtbl r600_buffer_vtbl;
-
-
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- struct r600_resource_buffer *rbuffer;
- struct r600_bo *bo;
- /* XXX We probably want a different alignment for buffers and textures. */
- unsigned alignment = 4096;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- rbuffer->user_buffer = NULL;
- rbuffer->r.base.b = *templ;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.size = rbuffer->r.base.b.width0;
- rbuffer->r.bo_size = rbuffer->r.size;
- rbuffer->uploaded = FALSE;
- bo = r600_bo((struct radeon*)screen->winsys, rbuffer->r.base.b.width0, alignment, rbuffer->r.base.b.bind, rbuffer->r.base.b.usage);
- if (bo == NULL) {
- FREE(rbuffer);
- return NULL;
- }
- rbuffer->r.bo = bo;
- return &rbuffer->r.base.b;
-}
-
-struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind)
-{
- struct r600_resource_buffer *rbuffer;
-
- rbuffer = CALLOC_STRUCT(r600_resource_buffer);
- if (rbuffer == NULL)
- return NULL;
-
- rbuffer->magic = R600_BUFFER_MAGIC;
- pipe_reference_init(&rbuffer->r.base.b.reference, 1);
- rbuffer->r.base.vtbl = &r600_buffer_vtbl;
- rbuffer->r.base.b.screen = screen;
- rbuffer->r.base.b.target = PIPE_BUFFER;
- rbuffer->r.base.b.format = PIPE_FORMAT_R8_UNORM;
- rbuffer->r.base.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuffer->r.base.b.bind = bind;
- rbuffer->r.base.b.width0 = bytes;
- rbuffer->r.base.b.height0 = 1;
- rbuffer->r.base.b.depth0 = 1;
- rbuffer->r.base.b.array_size = 1;
- rbuffer->r.base.b.flags = 0;
- rbuffer->r.bo = NULL;
- rbuffer->r.bo_size = 0;
- rbuffer->user_buffer = ptr;
- rbuffer->uploaded = FALSE;
- return &rbuffer->r.base.b;
-}
-
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
struct r600_resource_buffer *rbuffer = r600_buffer(buf);
if (rbuffer->r.bo) {
r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
}
rbuffer->r.bo = NULL;
- FREE(rbuffer);
+ util_slab_free(&rscreen->pool_buffers, rbuffer);
+}
+
+static unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
+ struct pipe_resource *buf,
+ unsigned level, int layer)
+{
+ /* FIXME */
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->level = level;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->layer_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
}
static void *r600_buffer_transfer_map(struct pipe_context *pipe,
@@ -117,8 +90,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
int write = 0;
uint8_t *data;
- if (rbuffer->user_buffer)
- return (uint8_t*)rbuffer->user_buffer + transfer->box.x;
+ if (rbuffer->r.b.user_ptr)
+ return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
if (transfer->usage & PIPE_TRANSFER_DONTBLOCK) {
/* FIXME */
@@ -138,7 +111,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
{
struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
- if (rbuffer->user_buffer)
+ if (rbuffer->r.b.user_ptr)
return;
if (rbuffer->r.bo)
@@ -151,128 +124,163 @@ static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
{
}
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer)
+static void r600_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ struct r600_pipe_context *rctx = (struct r600_pipe_context*)ctx;
+ util_slab_free(&rctx->pool_transfers, transfer);
}
-struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
- struct winsys_handle *whandle)
+static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
{
- struct radeon *rw = (struct radeon*)screen->winsys;
- struct r600_resource *rbuffer;
- struct r600_bo *bo = NULL;
+ struct radeon *ws = (struct radeon*)pipe->winsys;
+ struct r600_resource_buffer *rbuffer = r600_buffer(resource);
+ uint8_t *map = NULL;
- bo = r600_bo_handle(rw, whandle->handle, NULL);
- if (bo == NULL) {
- return NULL;
- }
+ assert(rbuffer->r.b.user_ptr == NULL);
- rbuffer = CALLOC_STRUCT(r600_resource);
- if (rbuffer == NULL) {
- r600_bo_reference(rw, &bo, NULL);
- return NULL;
- }
+ map = r600_bo_map(ws, rbuffer->r.bo,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
+ pipe);
- pipe_reference_init(&rbuffer->base.b.reference, 1);
- rbuffer->base.b.target = PIPE_BUFFER;
- rbuffer->base.b.screen = screen;
- rbuffer->base.vtbl = &r600_buffer_vtbl;
- rbuffer->bo = bo;
- return &rbuffer->base.b;
+ memcpy(map + box->x, data, box->width);
+
+ if (rbuffer->r.bo)
+ r600_bo_unmap(ws, rbuffer->r.bo);
}
-struct u_resource_vtbl r600_buffer_vtbl =
+static const struct u_resource_vtbl r600_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
r600_buffer_destroy, /* resource_destroy */
r600_buffer_is_referenced_by_cs, /* is_buffer_referenced */
- u_default_get_transfer, /* get_transfer */
- u_default_transfer_destroy, /* transfer_destroy */
+ r600_get_transfer, /* get_transfer */
+ r600_transfer_destroy, /* transfer_destroy */
r600_buffer_transfer_map, /* transfer_map */
r600_buffer_transfer_flush_region, /* transfer_flush_region */
r600_buffer_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
+ r600_buffer_transfer_inline_write /* transfer_inline_write */
};
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
{
- if (r600_buffer_is_user_buffer(draw->index_buffer)) {
- struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
- unsigned upload_offset;
- int ret = 0;
-
- ret = r600_upload_buffer(rctx->rupload_vb,
- draw->index_buffer_offset,
- draw->count * draw->index_size,
- rbuffer,
- &upload_offset,
- &rbuffer->r.bo_size,
- &rbuffer->r.bo);
- if (ret)
- return ret;
- rbuffer->uploaded = TRUE;
- draw->index_buffer_offset = upload_offset;
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
+ struct r600_bo *bo;
+ /* XXX We probably want a different alignment for buffers and textures. */
+ unsigned alignment = 4096;
+
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ rbuffer->r.b.b.b = *templ;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.user_ptr = NULL;
+ rbuffer->r.size = rbuffer->r.b.b.b.width0;
+ rbuffer->r.bo_size = rbuffer->r.size;
+
+ bo = r600_bo((struct radeon*)screen->winsys,
+ rbuffer->r.b.b.b.width0,
+ alignment, rbuffer->r.b.b.b.bind,
+ rbuffer->r.b.b.b.usage);
+
+ if (bo == NULL) {
+ FREE(rbuffer);
+ return NULL;
}
+ rbuffer->r.bo = bo;
+ return &rbuffer->r.b.b.b;
+}
+
+struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
+ void *ptr, unsigned bytes,
+ unsigned bind)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource_buffer *rbuffer;
- return 0;
+ rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+
+ rbuffer->magic = R600_BUFFER_MAGIC;
+ pipe_reference_init(&rbuffer->r.b.b.b.reference, 1);
+ rbuffer->r.b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->r.b.b.b.screen = screen;
+ rbuffer->r.b.b.b.target = PIPE_BUFFER;
+ rbuffer->r.b.b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuffer->r.b.b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuffer->r.b.b.b.bind = bind;
+ rbuffer->r.b.b.b.width0 = bytes;
+ rbuffer->r.b.b.b.height0 = 1;
+ rbuffer->r.b.b.b.depth0 = 1;
+ rbuffer->r.b.b.b.array_size = 1;
+ rbuffer->r.b.b.b.flags = 0;
+ rbuffer->r.b.user_ptr = ptr;
+ rbuffer->r.bo = NULL;
+ rbuffer->r.bo_size = 0;
+ return &rbuffer->r.b.b.b;
}
-int r600_upload_user_buffers(struct r600_pipe_context *rctx)
+struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
+ struct winsys_handle *whandle)
{
- enum pipe_error ret = PIPE_OK;
- int i, nr;
-
- nr = rctx->vertex_elements->count;
- nr = rctx->nvertex_buffer;
-
- for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (r600_buffer_is_user_buffer(vb->buffer)) {
- struct r600_resource_buffer *rbuffer = r600_buffer(vb->buffer);
- unsigned upload_offset;
-
- ret = r600_upload_buffer(rctx->rupload_vb,
- 0, vb->buffer->width0,
- rbuffer,
- &upload_offset,
- &rbuffer->r.bo_size,
- &rbuffer->r.bo);
- if (ret)
- return ret;
- rbuffer->uploaded = TRUE;
- vb->buffer_offset = upload_offset;
- }
+ struct radeon *rw = (struct radeon*)screen->winsys;
+ struct r600_resource *rbuffer;
+ struct r600_bo *bo = NULL;
+
+ bo = r600_bo_handle(rw, whandle->handle, NULL);
+ if (bo == NULL) {
+ return NULL;
+ }
+
+ rbuffer = CALLOC_STRUCT(r600_resource);
+ if (rbuffer == NULL) {
+ r600_bo_reference(rw, &bo, NULL);
+ return NULL;
}
- return ret;
+
+ pipe_reference_init(&rbuffer->b.b.b.reference, 1);
+ rbuffer->b.b.b.target = PIPE_BUFFER;
+ rbuffer->b.b.b.screen = screen;
+ rbuffer->b.b.vtbl = &r600_buffer_vtbl;
+ rbuffer->bo = bo;
+ return &rbuffer->b.b.b;
}
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw)
+{
+ struct r600_resource_buffer *rbuffer = r600_buffer(draw->index_buffer);
+ boolean flushed;
+
+ u_upload_data(rctx->vbuf_mgr->uploader, 0,
+ draw->info.count * draw->index_size,
+ rbuffer->r.b.user_ptr,
+ &draw->index_buffer_offset,
+ &draw->index_buffer, &flushed);
+}
-int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer,
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer,
uint32_t *const_offset)
{
- if (r600_buffer_is_user_buffer(cbuffer)) {
- struct r600_resource_buffer *rbuffer = r600_buffer(cbuffer);
- unsigned upload_offset;
- int ret = 0;
-
- ret = r600_upload_buffer(rctx->rupload_const,
- 0, cbuffer->width0,
- rbuffer,
- &upload_offset,
- &rbuffer->r.bo_size,
- &rbuffer->r.bo);
- if (ret)
- return ret;
- rbuffer->uploaded = TRUE;
- *const_offset = upload_offset;
- return 0;
- }
+ if ((*rbuffer)->r.b.user_ptr) {
+ uint8_t *ptr = (*rbuffer)->r.b.user_ptr;
+ unsigned size = (*rbuffer)->r.b.b.b.width0;
+ boolean flushed;
+
+ *rbuffer = NULL;
- *const_offset = 0;
- return 0;
+ u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset,
+ (struct pipe_resource**)rbuffer, &flushed);
+ } else {
+ *const_offset = 0;
+ }
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 68b625cc3b4..34094001b75 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -30,11 +30,13 @@
#include <tgsi/tgsi_util.h>
#include <util/u_blitter.h>
#include <util/u_double_list.h>
+#include <util/u_format_s3tc.h>
#include <util/u_transfer.h>
#include <util/u_surface.h>
#include <util/u_pack_color.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
+#include "util/u_upload_mgr.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -69,8 +71,30 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags,
#endif
r600_context_flush(&rctx->ctx);
- r600_upload_flush(rctx->rupload_vb);
- r600_upload_flush(rctx->rupload_const);
+ /* XXX This shouldn't be really necessary, but removing it breaks some tests.
+ * Needless buffer reallocations may significantly increase memory consumption,
+ * so getting rid of this call is important. */
+ u_upload_flush(rctx->vbuf_mgr->uploader);
+}
+
+static void r600_update_num_contexts(struct r600_screen *rscreen,
+ int diff)
+{
+ pipe_mutex_lock(rscreen->mutex_num_contexts);
+ if (diff > 0) {
+ rscreen->num_contexts++;
+
+ if (rscreen->num_contexts > 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_MULTITHREADED);
+ } else {
+ rscreen->num_contexts--;
+
+ if (rscreen->num_contexts <= 1)
+ util_slab_set_thread_safety(&rscreen->pool_buffers,
+ UTIL_SLAB_SINGLETHREADED);
+ }
+ pipe_mutex_unlock(rscreen->mutex_num_contexts);
}
static void r600_destroy_context(struct pipe_context *context)
@@ -79,8 +103,6 @@ static void r600_destroy_context(struct pipe_context *context)
rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
- r600_end_vertex_translate(rctx);
-
r600_context_fini(&rctx->ctx);
util_blitter_destroy(rctx->blitter);
@@ -89,14 +111,11 @@ static void r600_destroy_context(struct pipe_context *context)
free(rctx->states[i]);
}
- r600_upload_destroy(rctx->rupload_vb);
- r600_upload_destroy(rctx->rupload_const);
+ u_vbuf_mgr_destroy(rctx->vbuf_mgr);
+ util_slab_destroy(&rctx->pool_transfers);
- if (rctx->tran.translate_cache)
- translate_cache_destroy(rctx->tran.translate_cache);
+ r600_update_num_contexts(rctx->screen, -1);
- FREE(rctx->ps_resource);
- FREE(rctx->vs_resource);
FREE(rctx);
}
@@ -108,6 +127,9 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (rctx == NULL)
return NULL;
+
+ r600_update_num_contexts(rscreen, 1);
+
rctx->context.winsys = rscreen->screen.winsys;
rctx->context.screen = screen;
rctx->context.priv = priv;
@@ -123,6 +145,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
r600_init_query_functions(rctx);
r600_init_context_resource_functions(rctx);
r600_init_surface_functions(rctx);
+ rctx->context.draw_vbo = r600_draw_vbo;
switch (r600_get_family(rctx->radeon)) {
case CHIP_R600:
@@ -137,7 +160,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_RV730:
case CHIP_RV710:
case CHIP_RV740:
- rctx->context.draw_vbo = r600_draw_vbo;
r600_init_state_functions(rctx);
if (r600_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -154,7 +176,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
case CHIP_BARTS:
case CHIP_TURKS:
case CHIP_CAICOS:
- rctx->context.draw_vbo = evergreen_draw;
evergreen_init_state_functions(rctx);
if (evergreen_context_init(&rctx->ctx, rctx->radeon)) {
r600_destroy_context(&rctx->context);
@@ -168,39 +189,23 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
return NULL;
}
- rctx->rupload_vb = r600_upload_create(rctx, 128 * 1024, 16);
- if (rctx->rupload_vb == NULL) {
- r600_destroy_context(&rctx->context);
- return NULL;
- }
+ util_slab_create(&rctx->pool_transfers,
+ sizeof(struct pipe_transfer), 64,
+ UTIL_SLAB_SINGLETHREADED);
- rctx->rupload_const = r600_upload_create(rctx, 128 * 1024, 256);
- if (rctx->rupload_const == NULL) {
+ rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256,
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER,
+ U_VERTEX_FETCH_DWORD_ALIGNED);
+ if (!rctx->vbuf_mgr) {
r600_destroy_context(&rctx->context);
return NULL;
}
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->tran.translate_cache = translate_cache_create();
- if (rctx->tran.translate_cache == NULL) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->vs_resource) {
- FREE(rctx);
- return NULL;
- }
-
- rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state));
- if (!rctx->ps_resource) {
- FREE(rctx);
+ r600_destroy_context(&rctx->context);
return NULL;
}
@@ -284,13 +289,16 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 1;
/* Unsupported features (boolean caps). */
- case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
case PIPE_CAP_INSTANCED_DRAWING:
return 0;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ /* fix once the CS checker upstream is fixed */
+ return debug_get_bool_option("R600_ARRAY_TEXTURE", FALSE);
+
/* Texturing. */
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
@@ -319,6 +327,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+ /* Timer queries, present when the clock frequency is non zero. */
+ case PIPE_CAP_TIMER_QUERY:
+ return r600_get_clock_crystal_freq(rscreen->radeon) != 0;
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
@@ -385,7 +397,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_CONSTS:
return 256; //max native parameters
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 1;
+ return R600_MAX_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* FIXME */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -441,9 +453,14 @@ static boolean r600_is_format_supported(struct pipe_screen* screen,
retval |= PIPE_BIND_DEPTH_STENCIL;
}
- if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
- r600_is_vertex_format_supported(format))
- retval |= PIPE_BIND_VERTEX_BUFFER;
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ enum radeon_family family = r600_get_family(rscreen->radeon);
+
+ if (r600_is_vertex_format_supported(format, family)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+ }
if (usage & PIPE_BIND_TRANSFER_READ)
retval |= PIPE_BIND_TRANSFER_READ;
@@ -462,6 +479,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
radeon_decref(rscreen->radeon);
+ util_slab_destroy(&rscreen->pool_buffers);
+ pipe_mutex_destroy(rscreen->mutex_num_contexts);
FREE(rscreen);
}
@@ -489,6 +508,13 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
r600_init_screen_resource_functions(&rscreen->screen);
rscreen->tiling_info = r600_get_tiling_info(radeon);
+ util_format_s3tc_init();
+
+ util_slab_create(&rscreen->pool_buffers,
+ sizeof(struct r600_resource_buffer), 64,
+ UTIL_SLAB_SINGLETHREADED);
+
+ pipe_mutex_init(rscreen->mutex_num_contexts);
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 7f74fda0daf..8dc1f4ad5c3 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -30,12 +30,15 @@
#include <pipe/p_screen.h>
#include <pipe/p_context.h>
#include <util/u_math.h>
-#include "translate/translate_cache.h"
+#include "util/u_slab.h"
+#include "util/u_vbuf_mgr.h"
#include "r600.h"
#include "r600_public.h"
#include "r600_shader.h"
#include "r600_resource.h"
+#define R600_MAX_CONST_BUFFERS 1
+
enum r600_pipe_state_id {
R600_PIPE_STATE_BLEND = 0,
R600_PIPE_STATE_BLEND_COLOR,
@@ -62,6 +65,11 @@ struct r600_screen {
struct pipe_screen screen;
struct radeon *radeon;
struct r600_tiling_info *tiling_info;
+ struct util_slab_mempool pool_buffers;
+ unsigned num_contexts;
+
+ /* for thread-safe write accessing to num_contexts */
+ pipe_mutex mutex_num_contexts;
};
struct r600_pipe_sampler_view {
@@ -86,9 +94,7 @@ struct r600_vertex_element
{
unsigned count;
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
- enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
- unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ struct u_vbuf_mgr_elements *vmgr_elements;
struct r600_bo *fetch_shader;
unsigned fs_size;
struct r600_pipe_state rstate;
@@ -117,22 +123,9 @@ struct r600_textures_info {
unsigned n_samplers;
};
-/* vertex buffer translation context, used to translate vertex input that
- * hw doesn't natively support, so far only FLOAT64 is unsupported.
- */
-struct r600_translate_context {
- /* Translate cache for incompatible vertex offset/stride/format fallback. */
- struct translate_cache *translate_cache;
- /* The vertex buffer slot containing the translated buffer. */
- unsigned vb_slot;
- void *new_velems;
-};
-
#define R600_CONSTANT_ARRAY_SIZE 256
#define R600_RESOURCE_ARRAY_SIZE 160
-struct r600_upload;
-
struct r600_pipe_context {
struct pipe_context context;
struct blitter_context *blitter;
@@ -143,43 +136,35 @@ struct r600_pipe_context {
struct r600_pipe_state *states[R600_PIPE_NSTATES];
struct r600_context ctx;
struct r600_vertex_element *vertex_elements;
+ struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS];
struct pipe_framebuffer_state framebuffer;
struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nvertex_buffer;
unsigned cb_target_mask;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
- unsigned nvs_resource;
- struct r600_pipe_state *vs_resource;
- struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
struct r600_pipe_state vs_const_buffer;
+ struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_state ps_const_buffer;
+ struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS];
struct r600_pipe_rasterizer *rasterizer;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
- struct r600_upload *rupload_vb;
- unsigned any_user_vbs;
struct r600_textures_info ps_samplers;
- unsigned vb_max_index;
- struct r600_translate_context tran;
- struct r600_upload *rupload_const;
+
+ struct u_vbuf_mgr *vbuf_mgr;
+ struct util_slab_mempool pool_transfers;
+ bool blit;
};
struct r600_drawl {
+ struct pipe_draw_info info;
struct pipe_context *ctx;
- unsigned mode;
- unsigned min_index;
- unsigned max_index;
- unsigned index_bias;
- unsigned start;
- unsigned count;
unsigned index_size;
unsigned index_buffer_offset;
struct pipe_resource *index_buffer;
@@ -188,16 +173,20 @@ struct r600_drawl {
/* evergreen_state.c */
void evergreen_init_state_functions(struct r600_pipe_context *rctx);
void evergreen_init_config(struct r600_pipe_context *rctx);
-void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
-void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
+void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_blit_push_depth(struct pipe_context *ctx, struct r600_resource_texture *texture);
+void r600_flush_depth_textures(struct r600_pipe_context *rctx);
/* r600_buffer.c */
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
@@ -205,13 +194,9 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
void *ptr, unsigned bytes,
unsigned bind);
-unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
- struct pipe_resource *buf,
- unsigned level, int layer);
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
-int r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
-int r600_upload_user_buffers(struct r600_pipe_context *rctx);
+void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
/* r600_query.c */
void r600_init_query_functions(struct r600_pipe_context *rctx);
@@ -220,7 +205,6 @@ void r600_init_query_functions(struct r600_pipe_context *rctx);
void r600_init_context_resource_functions(struct r600_pipe_context *r600);
/* r600_shader.c */
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
@@ -228,11 +212,14 @@ int r600_find_vs_semantic_index(struct r600_shader *vs,
/* r600_state.c */
void r600_init_state_functions(struct r600_pipe_context *rctx);
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
+void r600_spi_update(struct r600_pipe_context *rctx);
void r600_init_config(struct r600_pipe_context *rctx);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride);
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
@@ -247,8 +234,6 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned layer);
/* r600_translate.c */
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx);
-void r600_end_vertex_translate(struct r600_pipe_context *rctx);
void r600_translate_index_buffer(struct r600_pipe_context *r600,
struct pipe_resource **index_buffer,
unsigned *index_size,
@@ -277,6 +262,9 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state);
void r600_bind_vs_shader(struct pipe_context *ctx, void *state);
void r600_delete_ps_shader(struct pipe_context *ctx, void *state);
void r600_delete_vs_shader(struct pipe_context *ctx, void *state);
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer);
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
/*
* common helpers
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 28b3e1e5e40..836e7491f1f 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -24,6 +24,7 @@
#define R600_RESOURCE_H
#include "util/u_transfer.h"
+#include "util/u_vbuf_mgr.h"
/* flag to indicate a resource is to be used as a transfer so should not be tiled */
#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
@@ -43,7 +44,7 @@ struct r600_transfer {
* underlying implementations.
*/
struct r600_resource {
- struct u_resource base;
+ struct u_vbuf_resource b;
struct r600_bo *bo;
u32 size;
unsigned bo_size;
@@ -52,26 +53,31 @@ struct r600_resource {
struct r600_resource_texture {
struct r600_resource resource;
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];
- unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; /* transfer */
+ unsigned pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */
unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS];
unsigned pitch_override;
unsigned size;
- unsigned tiled;
unsigned tile_type;
unsigned depth;
- unsigned dirty;
+ unsigned dirty_db;
struct r600_resource_texture *flushed_depth_texture;
+ boolean is_flushing_texture;
+
+ /* on some cards we have to use integer 64/128-bit types
+ for s3tc blits, do this until gallium grows int formats */
+ boolean force_int_type;
};
+#define R600_TEX_IS_TILED(tex, level) ((tex)->array_mode[level] != V_038000_ARRAY_LINEAR_GENERAL && (tex)->array_mode[level] != V_038000_ARRAY_LINEAR_ALIGNED)
+
#define R600_BUFFER_MAGIC 0xabcd1600
+/* XXX this could be removed */
struct r600_resource_buffer {
struct r600_resource r;
uint32_t magic;
- void *user_buffer;
- bool uploaded;
};
struct r600_surface {
@@ -98,14 +104,7 @@ static INLINE struct r600_resource_buffer *r600_buffer(struct pipe_resource *buf
return NULL;
}
-static INLINE boolean r600_buffer_is_user_buffer(struct pipe_resource *buffer)
-{
- if (r600_buffer(buffer)->uploaded)
- return FALSE;
- return r600_buffer(buffer)->user_buffer ? TRUE : FALSE;
-}
-
-int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture);
+int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create);
/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
@@ -121,15 +120,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
struct r600_pipe_context;
-struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
- unsigned default_size,
- unsigned alignment);
-void r600_upload_flush(struct r600_upload *upload);
-void r600_upload_destroy(struct r600_upload *upload);
-int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
- unsigned size, struct r600_resource_buffer *in_buffer,
- unsigned *out_offset, unsigned *out_size,
- struct r600_bo **out_buffer);
-
-int r600_upload_const_buffer(struct r600_pipe_context *rctx, struct pipe_resource *cbuffer, uint32_t *offset);
+
+void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource_buffer **rbuffer, uint32_t *offset);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c982471a04f..240c8f1ffd0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -28,6 +28,7 @@
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
+#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
#include <stdio.h>
@@ -175,6 +176,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
R_0288CC_SQ_PGM_CF_OFFSET_PS,
0x00000000, 0xFFFFFFFF, NULL);
+ if (rshader->fs_write_all) {
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+ S_028808_MULTIWRITE_ENABLE(1),
+ S_028808_MULTIWRITE_ENABLE(1),
+ NULL);
+ }
+
if (rshader->uses_kill) {
/* only set some bits here, the other bits are set in the dsa state */
r600_pipe_state_add_reg(rstate,
@@ -187,7 +195,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
0xFFFFFFFF, NULL);
}
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
@@ -225,12 +233,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
return 0;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- u32 *literals;
int r;
/* Would like some magic "get_bool_option_once" routine.
@@ -243,13 +251,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
tgsi_dump(tokens, 0);
}
shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
+ r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
}
r = r600_bc_build(&shader->shader.bc);
- free(literals);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
@@ -274,6 +281,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
*/
struct r600_shader_tgsi_instruction;
+struct r600_shader_src {
+ unsigned sel;
+ unsigned swizzle[4];
+ unsigned neg;
+ unsigned abs;
+ unsigned rel;
+ uint32_t value[4];
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
@@ -281,9 +297,11 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
struct r600_shader *shader;
+ struct r600_shader_src src[3];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -492,9 +510,179 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
+static void tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ struct r600_shader_src *r600_src)
+{
+ memset(r600_src, 0, sizeof(*r600_src));
+ r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
+ r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
+ r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
+ r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
+ r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
+}
+
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+ struct r600_bc_vtx vtx;
+ unsigned int ar_reg;
+ int r;
+
+ if (offset) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+ alu.src[0].sel = ctx->ar_reg;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = offset;
+
+ alu.dst.sel = dst_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ ar_reg = dst_reg;
+ } else {
+ ar_reg = ctx->ar_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = ar_reg;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nconst, r;
+
+ for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+ nconst++;
+ }
+ tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
+ }
+ for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+ continue;
+ }
+
+ if (ctx->src[i].rel) {
+ int treg = r600_get_temp(ctx);
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+ return r;
+
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
+ j--;
+ } else if (j > 0) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].rel = ctx->src[i].rel;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel =0;
+ j--;
+ }
+ }
+ return 0;
+}
+
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].value = ctx->src[i].value[k];
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ j--;
+ }
+ }
+ return 0;
+}
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
+ struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned noutput;
@@ -558,12 +746,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
- ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.temp_reg = ctx.ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
-
+ shader->fs_write_all = FALSE;
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
@@ -592,7 +781,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.max_driver_temp_used = 0;
/* reserve first tmp for everyone */
r600_get_temp(&ctx);
+
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
@@ -602,6 +796,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
goto out_err;
break;
case TGSI_TOKEN_TYPE_PROPERTY:
+ property = &ctx.parse.FullToken.FullProperty;
+ if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ if (property->u[0].Data == 1)
+ shader->fs_write_all = TRUE;
+ }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -619,6 +818,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
switch (ctx.type) {
@@ -680,6 +880,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
noutput++;
@@ -694,6 +895,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
noutput++;
@@ -704,7 +906,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (r)
goto out_err;
}
- *literals = ctx.literals;
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
@@ -724,40 +926,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_src(struct r600_shader_ctx *ctx,
- const struct tgsi_full_src_register *tgsi_src,
- struct r600_bc_alu_src *r600_src)
+static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan)
{
- memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->neg = tgsi_src->Register.Negate;
- r600_src->abs = tgsi_src->Register.Absolute;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- int index;
- if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
- (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
- (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
-
- index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
- r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
- if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
- return 0;
- }
- index = tgsi_src->Register.Index;
- r600_src->sel = V_SQ_ALU_SRC_LITERAL;
- r600_src->value = ctx->literals + index * 4;
- } else {
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
- r600_src->sel = tgsi_src->Register.Index;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
- }
- return 0;
+ bc_src->sel = shader_src->sel;
+ bc_src->chan = shader_src->swizzle[chan];
+ bc_src->neg = shader_src->neg;
+ bc_src->abs = shader_src->abs;
+ bc_src->rel = shader_src->rel;
+ bc_src->value = shader_src->value[bc_src->chan];
}
-static int tgsi_dst(struct r600_shader_ctx *ctx,
- const struct tgsi_full_dst_register *tgsi_dst,
- unsigned swizzle,
- struct r600_bc_alu_dst *r600_dst)
+static void tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -770,101 +954,6 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
- return 0;
-}
-
-static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
-{
- switch (swizzle) {
- case 0:
- return tgsi_src->Register.SwizzleX;
- case 1:
- return tgsi_src->Register.SwizzleY;
- case 2:
- return tgsi_src->Register.SwizzleZ;
- case 3:
- return tgsi_src->Register.SwizzleW;
- default:
- return 0;
- }
-}
-
-static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nconst, r;
-
- for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- nconst++;
- }
- r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
- if (r) {
- return r;
- }
- }
- for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].rel = r600_src[i].rel;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- r600_src[i].rel =0;
- j--;
- }
- }
- return 0;
-}
-
-/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
-static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nliteral, r;
-
- for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- nliteral++;
- }
- }
- for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].value = r600_src[i].value;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- j--;
- }
- }
- return 0;
}
static int tgsi_last_instruction(unsigned writemask)
@@ -882,38 +971,25 @@ static int tgsi_last_instruction(unsigned writemask)
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.inst = ctx->inst_info->r600_opcode;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
} else {
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -951,24 +1027,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
*/
-static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
- struct r600_bc_alu_src r600_src[3])
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
{
static float half_inv_pi = 1.0 /(3.1415926535 * 2);
static float double_pi = 3.1415926535 * 2;
static float neg_pi = -3.1415926535;
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int r;
struct r600_bc_alu alu;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -977,12 +1044,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[1].value = (uint32_t *)&half_inv_pi;
+ alu.src[1].value = *(uint32_t *)&half_inv_pi;
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
alu.src[2].chan = 0;
alu.last = 1;
@@ -1021,8 +1087,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[2].chan = 0;
if (ctx->bc->chiprev == CHIPREV_R600) {
- alu.src[1].value = (uint32_t *)&double_pi;
- alu.src[2].value = (uint32_t *)&neg_pi;
+ alu.src[1].value = *(uint32_t *)&double_pi;
+ alu.src[2].value = *(uint32_t *)&neg_pi;
} else {
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
@@ -1039,12 +1105,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
@@ -1070,9 +1135,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = ctx->temp_reg;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1085,7 +1148,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int r;
@@ -1093,7 +1155,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
* X or Y components of the destination vector.
*/
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
}
@@ -1102,9 +1164,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1118,9 +1178,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1136,9 +1194,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = 0;
@@ -1156,9 +1212,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
@@ -1175,7 +1229,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int i, r;
@@ -1191,10 +1244,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -1214,24 +1264,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1240,12 +1280,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- alu.src[0] = r600_src[0];
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
alu.src[1].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1256,9 +1294,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1273,11 +1309,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1289,13 +1322,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[2], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
@@ -1310,9 +1341,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1336,10 +1365,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
alu.src[i].abs = 1;
}
alu.dst.sel = ctx->temp_reg;
@@ -1363,9 +1389,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
alu.src[0].sel = ctx->temp_reg;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.dst.chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
if (i == 3)
alu.last = 1;
@@ -1385,10 +1409,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1402,17 +1423,13 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
/* LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
@@ -1422,10 +1439,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
alu.src[1].sel = ctx->temp_reg;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1450,16 +1464,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int i, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1469,13 +1475,10 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bc_src(&alu.src[2], &ctx->src[0], i);
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1488,9 +1491,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
alu.is_op3 = 1;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
@@ -1523,9 +1524,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
alu.dst.chan = i;
} else {
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
}
@@ -1542,17 +1541,10 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1560,14 +1552,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -1584,28 +1572,17 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
static int tgsi_dp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
@@ -1661,11 +1638,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* Add perspective divide */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 3;
alu.last = 1;
@@ -1679,10 +1653,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 3;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
@@ -1735,14 +1706,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
src2_chan = 0;
break;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
+ r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1782,7 +1747,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
- alu.src[2].value = (u32*)&one_point_five;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1803,7 +1768,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
- alu.src[2].value = (u32*)&one_point_five;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1822,10 +1787,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1846,7 +1808,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.resource_id = tex.sampler_id;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -1872,6 +1834,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.coord_type_w = 1;
}
+ if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
+ tex.coord_type_z = 0;
+ tex.src_sel_z = 1;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
+ tex.coord_type_z = 0;
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
tex.src_sel_w = 2;
@@ -1886,36 +1854,23 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* optimize if it's just an equal balance */
- if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.omod = 3;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
if (i == lasti) {
alu.last = 1;
@@ -1936,8 +1891,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
@@ -1959,8 +1913,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == lasti) {
@@ -1980,17 +1933,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[1] = r600_src[1];
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
if (i == lasti) {
alu.last = 1;
@@ -2005,37 +1953,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
static int tgsi_cmp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
-
- alu.src[2] = r600_src[1];
- alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
-
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ r600_bc_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2051,7 +1982,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
static int tgsi_xpd(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
uint32_t use_temp = 0;
int i, r;
@@ -2059,43 +1989,34 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- alu.src[0] = r600_src[0];
switch (i) {
case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 2);
break;
case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
break;
case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
break;
case 3:
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
}
- alu.src[1] = r600_src[1];
switch (i) {
case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 1);
break;
case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 2);
break;
case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 0);
break;
case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
@@ -2117,32 +2038,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
- alu.src[0] = r600_src[0];
switch (i) {
case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
break;
case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 2);
break;
case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
break;
case 3:
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
}
- alu.src[1] = r600_src[1];
switch (i) {
case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 2);
break;
case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 0);
break;
case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 1);
break;
case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
@@ -2155,11 +2074,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (use_temp)
alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2177,7 +2093,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
@@ -2186,11 +2101,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2218,11 +2129,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
- alu.src[0] = r600_src[0];
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2242,10 +2149,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2288,11 +2192,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2321,11 +2221,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -2385,11 +2281,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = ctx->temp_reg;
alu.src[1].chan = 1;
@@ -2409,11 +2301,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2451,6 +2339,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
@@ -2465,23 +2354,23 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
return -1;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
- alu.dst.chan = 0;
- alu.dst.sel = ctx->temp_reg;
+ alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ /* TODO: Note that the MOVA can be avoided if we never use AR for
+ * indexing non-CB registers in the current ALU clause. Similarly, we
+ * need to load AR from ar_reg again if we started a new clause
+ * between ARL and AR usage. The easy way to do that is to remove
+ * the MOVA here, and load it for the first AR access after ar_reg
+ * has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2495,26 +2384,48 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ alu.src[0].sel = ctx->ar_reg;
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
case TGSI_OPCODE_ARR:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
-
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
-
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2534,26 +2445,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == 0 || i == 3) {
alu.src[0].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
}
- if (i == 0 || i == 2) {
+ if (i == 0 || i == 2) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
}
if (i == 3)
alu.last = 1;
@@ -2566,7 +2469,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
@@ -2578,10 +2480,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
alu.dst.write = 1;
alu.dst.chan = 0;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 935dd6fe3ab..8f96ce5085c 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -45,8 +45,7 @@ struct r600_shader {
struct r600_shader_io output[32];
enum radeon_family family;
boolean uses_kill;
+ boolean fs_write_all;
};
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
-
#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index de2668cee16..576067ae81e 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -37,6 +37,7 @@
#include <util/u_memory.h>
#include <util/u_inlines.h>
#include <util/u_framebuffer.h>
+#include "util/u_transfer.h"
#include <pipebuffer/pb_buffer.h>
#include "r600.h"
#include "r600d.h"
@@ -94,221 +95,6 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
-/* FIXME optimize away spi update when it's not needed */
-static void r600_spi_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_shader *shader = rctx->ps_shader;
- struct r600_pipe_state rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp;
-
- rstate.nregs = 0;
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- r600_context_pipe_state_set(&rctx->ctx, &rstate);
-}
-
-void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
-{
- struct r600_pipe_state *rstate;
- struct r600_resource *rbuffer;
- struct pipe_vertex_buffer *vertex_buffer;
- unsigned i, offset;
-
- /* we don't update until we know vertex elements */
- if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
- return;
-
- if (rctx->vertex_elements->incompatible_layout) {
- /* translate rebind new vertex elements so
- * return once translated
- */
- r600_begin_vertex_translate(rctx);
- return;
- }
-
- if (rctx->any_user_vbs) {
- r600_upload_user_buffers(rctx);
- rctx->any_user_vbs = FALSE;
- }
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- rctx->nvs_resource = rctx->vertex_elements->count;
- } else {
- /* bind vertex buffer once */
- rctx->nvs_resource = rctx->nvertex_buffer;
- }
-
- for (i = 0 ; i < rctx->nvs_resource; i++) {
- rstate = &rctx->vs_resource[i];
- rstate->id = R600_PIPE_STATE_RESOURCE;
- rstate->nregs = 0;
-
- if (rctx->vertex_elements->vbuffer_need_offset) {
- /* one resource per vertex elements */
- unsigned vbuffer_index;
- vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
- vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = rctx->vertex_elements->vbuffer_offset[i];
- } else {
- /* bind vertex buffer once */
- vertex_buffer = &rctx->vertex_buffer[i];
- rbuffer = (struct r600_resource*)vertex_buffer->buffer;
- offset = 0;
- }
- if (vertex_buffer == NULL || rbuffer == NULL)
- continue;
- offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
-
- r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
- offset, 0xFFFFFFFF, rbuffer->bo);
- r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
- S_038008_STRIDE(vertex_buffer->stride),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
- 0xC0000000, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
- }
-}
-
-static void r600_draw_common(struct r600_drawl *draw)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
- struct r600_resource *rbuffer;
- unsigned prim;
- u32 vgt_dma_index_type, vgt_draw_initiator, mask;
- struct r600_draw rdraw;
- struct r600_pipe_state vgt;
-
- switch (draw->index_size) {
- case 2:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 0;
- break;
- case 4:
- vgt_draw_initiator = 0;
- vgt_dma_index_type = 1;
- break;
- case 0:
- vgt_draw_initiator = 2;
- vgt_dma_index_type = 0;
- break;
- default:
- R600_ERR("unsupported index size %d\n", draw->index_size);
- return;
- }
- if (r600_conv_pipe_prim(draw->mode, &prim))
- return;
- if (unlikely(rctx->ps_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- if (unlikely(rctx->vs_shader == NULL)) {
- R600_ERR("missing vertex shader\n");
- return;
- }
- /* there should be enough input */
- if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
- return;
- }
-
- r600_spi_update(rctx);
-
- mask = 0;
- for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
- mask |= (0xF << (i * 4));
- }
-
- vgt.id = R600_PIPE_STATE_VGT;
- vgt.nregs = 0;
- r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw->index_bias, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw->max_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw->min_index, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
- r600_context_pipe_state_set(&rctx->ctx, &vgt);
-
- rdraw.vgt_num_indices = draw->count;
- rdraw.vgt_num_instances = 1;
- rdraw.vgt_index_type = vgt_dma_index_type;
- rdraw.vgt_draw_initiator = vgt_draw_initiator;
- rdraw.indices = NULL;
- if (draw->index_buffer) {
- rbuffer = (struct r600_resource*)draw->index_buffer;
- rdraw.indices = rbuffer->bo;
- rdraw.indices_bo_offset = draw->index_buffer_offset;
- }
- r600_context_draw(&rctx->ctx, &rdraw);
-}
-
-void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_drawl draw;
-
- memset(&draw, 0, sizeof(struct r600_drawl));
- draw.ctx = ctx;
- draw.mode = info->mode;
- draw.start = info->start;
- draw.count = info->count;
- if (info->indexed && rctx->index_buffer.buffer) {
- draw.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->index_bias;
-
- r600_translate_index_buffer(rctx, &rctx->index_buffer.buffer,
- &rctx->index_buffer.index_size,
- &draw.start,
- info->count);
-
- draw.index_size = rctx->index_buffer.index_size;
- pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
- draw.index_buffer_offset = draw.start * draw.index_size;
- draw.start = 0;
- r600_upload_index_buffer(rctx, &draw);
- } else {
- draw.index_size = 0;
- draw.index_buffer = NULL;
- draw.min_index = info->min_index;
- draw.max_index = info->max_index;
- draw.index_bias = info->start;
- }
- r600_draw_common(&draw);
-
- pipe_resource_reference(&draw.index_buffer, NULL);
-}
-
static void r600_set_blend_color(struct pipe_context *ctx,
const struct pipe_blend_color *state)
{
@@ -616,6 +402,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
struct r600_bo *bo[2];
+ unsigned height, depth;
if (resource == NULL)
return NULL;
@@ -643,22 +430,30 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
if (desc == NULL) {
R600_ERR("unknow format %d\n", state->format);
}
- tmp = (struct r600_resource_texture*)texture;
+ tmp = (struct r600_resource_texture *)texture;
+ if (tmp->depth && !tmp->is_flushing_texture) {
+ r600_texture_depth_flush(ctx, texture, TRUE);
+ tmp = tmp->flushed_depth_texture;
+ }
+
+ if (tmp->force_int_type) {
+ word4 &= C_038010_NUM_FORMAT_ALL;
+ word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
+ }
rbuffer = &tmp->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
- /* FIXME depth texture decompression */
- if (tmp->depth) {
- r600_texture_depth_flush(ctx, texture);
- tmp = (struct r600_resource_texture*)texture;
- rbuffer = &tmp->flushed_depth_texture->resource;
- bo[0] = rbuffer->bo;
- bo[1] = rbuffer->bo;
- }
- pitch = align(tmp->pitch_in_pixels[0], 8);
- if (tmp->tiled) {
- array_mode = tmp->array_mode[0];
- tile_type = tmp->tile_type;
+ pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+
+ height = texture->height0;
+ depth = texture->depth0;
+ if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ height = 1;
+ depth = texture->array_size;
+ } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ depth = texture->array_size;
}
/* FIXME properly handle first level != 0 */
@@ -669,22 +464,22 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
- S_038004_TEX_HEIGHT(texture->height0 - 1) |
- S_038004_TEX_DEPTH(texture->depth0 - 1) |
+ S_038004_TEX_HEIGHT(height - 1) |
+ S_038004_TEX_DEPTH(depth - 1) |
S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
(tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
- word4 | S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
+ word4 |
S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
S_038014_LAST_LEVEL(state->u.tex.last_level) |
- S_038014_BASE_ARRAY(0) |
- S_038014_LAST_ARRAY(0), 0xFFFFFFFF, NULL);
+ S_038014_BASE_ARRAY(state->u.tex.first_layer) |
+ S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL);
@@ -714,9 +509,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
for (i = 0; i < count; i++) {
if (&rctx->ps_samplers.views[i]->base != views[i]) {
if (resource[i])
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state,
+ i + R600_MAX_CONST_BUFFERS);
else
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&rctx->ps_samplers.views[i],
@@ -726,7 +523,8 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count,
}
for (i = count; i < NUM_TEX_UNITS; i++) {
if (rctx->ps_samplers.views[i]) {
- r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL,
+ i + R600_MAX_CONST_BUFFERS);
pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL);
}
}
@@ -908,21 +706,28 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
unsigned offset;
const struct util_format_description *desc;
struct r600_bo *bo[3];
+ int i;
surf = (struct r600_surface *)state->cbufs[cb];
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE);
+ rtex = rtex->flushed_depth_texture;
+ }
+
rbuffer = &rtex->resource;
bo[0] = rbuffer->bo;
bo[1] = rbuffer->bo;
bo[2] = rbuffer->bo;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
- offset = r600_texture_get_offset((struct r600_resource_texture *)state->cbufs[cb]->texture,
+ offset = r600_texture_get_offset(rtex,
level, state->cbufs[cb]->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
ntype = 0;
- desc = util_format_description(rtex->resource.base.b.format);
+ desc = util_format_description(surf->base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_0280A0_NUMBER_SRGB;
else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
@@ -937,15 +742,30 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
}
}
- format = r600_translate_colorformat(rtex->resource.base.b.format);
- swap = r600_translate_colorswap(rtex->resource.base.b.format);
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ format = r600_translate_colorformat(surf->base.format);
+ swap = r600_translate_colorswap(surf->base.format);
+
+ /* disable when gallium grows int textures */
+ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type)
+ ntype = 4;
+
color_info = S_0280A0_FORMAT(format) |
S_0280A0_COMP_SWAP(swap) |
S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
S_0280A0_BLEND_CLAMP(1) |
S_0280A0_NUMBER_TYPE(ntype);
- if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- color_info |= S_0280A0_SOURCE_FORMAT(1);
+
+ /* on R600 this can't be set if BLEND_CLAMP isn't set,
+ if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
+ desc->channel[i].size < 12)
+ color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
r600_pipe_state_add_reg(rstate,
R_028040_CB_COLOR0_BASE + cb * 4,
@@ -989,17 +809,14 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
surf = (struct r600_surface *)state->zsbuf;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
- rtex->tiled = 1;
- rtex->array_mode[level] = 2;
- rtex->tile_type = 1;
- rtex->depth = 1;
+
rbuffer = &rtex->resource;
/* XXX quite sure for dx10+ hw don't need any offset hacks */
offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
level, state->zsbuf->u.tex.first_layer);
- pitch = rtex->pitch_in_pixels[level] / 8 - 1;
- slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1;
+ pitch = rtex->pitch_in_blocks[level] / 8 - 1;
+ slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
@@ -1115,51 +932,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
}
}
-static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
- struct pipe_resource *buffer)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
- uint32_t offset;
-
- /* Note that the state tracker can unbind constant buffers by
- * passing NULL here.
- */
- if (buffer == NULL) {
- return;
- }
-
- r600_upload_const_buffer(rctx, buffer, &offset);
-
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- rctx->vs_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->vs_const_buffer,
- R_028980_ALU_CONST_CACHE_VS_0,
- (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
- break;
- case PIPE_SHADER_FRAGMENT:
- rctx->ps_const_buffer.nregs = 0;
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- ALIGN_DIVUP(buffer->width0 >> 4, 16),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(&rctx->ps_const_buffer,
- R_028940_ALU_CONST_CACHE_PS_0,
- (r600_bo_offset(rbuffer->bo) + offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
- r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
- break;
- default:
- R600_ERR("unsupported %d\n", shader);
- return;
- }
-}
-
void r600_init_state_functions(struct r600_pipe_context *rctx)
{
rctx->context.create_blend_state = r600_create_blend_state;
@@ -1199,6 +971,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx)
rctx->context.set_vertex_sampler_views = r600_set_vs_sampler_view;
rctx->context.set_viewport_state = r600_set_viewport_state;
rctx->context.sampler_view_destroy = r600_sampler_view_destroy;
+ rctx->context.redefine_user_buffer = u_default_redefine_user_buffer;
}
void r600_init_config(struct r600_pipe_context *rctx)
@@ -1489,3 +1262,25 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
S_028D0C_COPY_CENTROID(1), NULL);
return rstate;
}
+
+void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx,
+ struct r600_pipe_state *rstate,
+ struct r600_resource *rbuffer,
+ unsigned offset, unsigned stride)
+{
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+}
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 3603376f738..72707fbd8b8 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -27,7 +27,9 @@
#include <util/u_memory.h>
#include <util/u_format.h>
#include <pipebuffer/pb_buffer.h>
+#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
+#include "r600d.h"
/* common state between evergreen and r600 */
void r600_bind_blend_state(struct pipe_context *ctx, void *state)
@@ -119,24 +121,13 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = (struct r600_vertex_element*)state;
- /* delete previous translated vertex elements */
- if (rctx->tran.new_velems) {
- r600_end_vertex_translate(rctx);
- }
-
rctx->vertex_elements = v;
if (v) {
+ u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state,
+ v->vmgr_elements);
+
rctx->states[v->rstate.id] = &v->rstate;
r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
- }
- }
-
- if (v) {
-// rctx->vs_rebuild = TRUE;
}
}
@@ -152,6 +143,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
rctx->vertex_elements = NULL;
r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
FREE(state);
}
@@ -176,88 +168,44 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct pipe_vertex_buffer *vbo;
- unsigned max_index = (unsigned)-1;
+ int i;
- if (rctx->family >= CHIP_CEDAR) {
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
- evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ /* Zero states. */
+ for (i = 0; i < count; i++) {
+ if (!buffers[i].buffer) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ }
}
- } else {
- for (int i = 0; i < rctx->nvertex_buffer; i++) {
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL);
+ }
+ for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) {
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
+ } else {
r600_context_pipe_state_set_fs_resource(&rctx->ctx, NULL, i);
}
}
- memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count);
-
- for (int i = 0; i < count; i++) {
- vbo = (struct pipe_vertex_buffer*)&buffers[i];
-
- rctx->vertex_buffer[i].buffer = NULL;
- if (buffers[i].buffer == NULL)
- continue;
- if (r600_buffer_is_user_buffer(buffers[i].buffer))
- rctx->any_user_vbs = TRUE;
- pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer);
- /* The stride of zero means we will be fetching only the first
- * vertex, so don't care about max_index. */
- if (!vbo->stride)
- continue;
-
- if (vbo->max_index == ~0) {
- vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
- }
- max_index = MIN2(vbo->max_index, max_index);
- }
- rctx->nvertex_buffer = count;
- rctx->vb_max_index = max_index;
- if (rctx->family >= CHIP_CEDAR) {
- evergreen_vertex_buffer_update(rctx);
- } else {
- r600_vertex_buffer_update(rctx);
- }
+ u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
}
-
-#define FORMAT_REPLACE(what, withwhat) \
- case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
-
void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- enum pipe_format *format;
- int i;
assert(count < 32);
if (!v)
return NULL;
v->count = count;
- memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element));
-
- for (i = 0; i < count; i++) {
- v->hw_format[i] = v->elements[i].src_format;
- format = &v->hw_format[i];
-
- switch (*format) {
- FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
- FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
- FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
- FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
- default:;
- }
- v->incompatible_layout =
- v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i];
-
- v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
- }
+ v->vmgr_elements =
+ u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count,
+ elements, v->elements);
if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
FREE(v);
@@ -327,3 +275,274 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state)
r600_pipe_shader_destroy(ctx, shader);
free(shader);
}
+
+/* FIXME optimize away spi update when it's not needed */
+void r600_spi_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_shader *shader = rctx->ps_shader;
+ struct r600_pipe_state rstate;
+ struct r600_shader *rshader = &shader->shader;
+ unsigned i, tmp;
+
+ rstate.nregs = 0;
+ for (i = 0; i < rshader->ninput; i++) {
+ tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
+ tmp |= S_028644_FLAT_SHADE(rctx->flatshade);
+ }
+
+ if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
+ rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
+
+ if (rctx->family < CHIP_CEDAR) {
+ if (rshader->input[i].centroid)
+ tmp |= S_028644_SEL_CENTROID(1);
+
+ if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+ tmp |= S_028644_SEL_LINEAR(1);
+ }
+
+ r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
+ }
+ r600_context_pipe_state_set(&rctx->ctx, &rstate);
+}
+
+void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
+ struct pipe_resource *buffer)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource_buffer *rbuffer = r600_buffer(buffer);
+ struct r600_pipe_state *rstate;
+ uint32_t offset;
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (buffer == NULL) {
+ return;
+ }
+
+ r600_upload_const_buffer(rctx, &rbuffer, &offset);
+ offset += r600_bo_offset(rbuffer->r.bo);
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ rctx->vs_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->vs_const_buffer,
+ R_028980_ALU_CONST_CACHE_VS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
+
+ rstate = &rctx->vs_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ rctx->ps_const_buffer.nregs = 0;
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+ ALIGN_DIVUP(buffer->width0 >> 4, 16),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&rctx->ps_const_buffer,
+ R_028940_ALU_CONST_CACHE_PS_0,
+ offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+ r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
+
+ rstate = &rctx->ps_const_buffer_resource[index];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16);
+ r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
+ }
+ break;
+ default:
+ R600_ERR("unsupported %d\n", shader);
+ return;
+ }
+
+ if (buffer != &rbuffer->r.b.b.b)
+ pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL);
+}
+
+static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, count, offset;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ count = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+ }
+
+ for (i = 0 ; i < count; i++) {
+ rstate = &rctx->fs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index];
+ offset = rctx->vertex_elements->vbuffer_offset[i];
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i];
+ offset = 0;
+ }
+ if (vertex_buffer == NULL || rbuffer == NULL)
+ continue;
+ offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ } else {
+ r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+ }
+}
+
+void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_resource *rbuffer;
+ u32 vgt_dma_index_type, vgt_draw_initiator, mask;
+ struct r600_draw rdraw;
+ struct r600_pipe_state vgt;
+ struct r600_drawl draw = {};
+ unsigned prim;
+
+ r600_flush_depth_textures(rctx);
+ u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL);
+ r600_vertex_buffer_update(rctx);
+
+ draw.info = *info;
+ draw.ctx = ctx;
+ if (info->indexed && rctx->index_buffer.buffer) {
+ draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size;
+ pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer);
+
+ r600_translate_index_buffer(rctx, &draw.index_buffer,
+ &rctx->index_buffer.index_size,
+ &draw.info.start,
+ info->count);
+
+ draw.index_size = rctx->index_buffer.index_size;
+ draw.index_buffer_offset = draw.info.start * draw.index_size;
+ draw.info.start = 0;
+
+ if (u_vbuf_resource(draw.index_buffer)->user_ptr) {
+ r600_upload_index_buffer(rctx, &draw);
+ }
+ } else {
+ draw.info.index_bias = info->start;
+ }
+
+ switch (draw.index_size) {
+ case 2:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 0;
+ break;
+ case 4:
+ vgt_draw_initiator = 0;
+ vgt_dma_index_type = 1;
+ break;
+ case 0:
+ vgt_draw_initiator = 2;
+ vgt_dma_index_type = 0;
+ break;
+ default:
+ R600_ERR("unsupported index size %d\n", draw.index_size);
+ return;
+ }
+ if (r600_conv_pipe_prim(draw.info.mode, &prim))
+ return;
+ if (unlikely(rctx->ps_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ if (unlikely(rctx->vs_shader == NULL)) {
+ R600_ERR("missing vertex shader\n");
+ return;
+ }
+ /* there should be enough input */
+ if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) {
+ R600_ERR("%d resources provided, expecting %d\n",
+ rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource);
+ return;
+ }
+
+ r600_spi_update(rctx);
+
+ mask = 0;
+ for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+ mask |= (0xF << (i * 4));
+ }
+
+ vgt.id = R600_PIPE_STATE_VGT;
+ vgt.nregs = 0;
+ r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set(&rctx->ctx, &vgt);
+
+ rdraw.vgt_num_indices = draw.info.count;
+ rdraw.vgt_num_instances = 1;
+ rdraw.vgt_index_type = vgt_dma_index_type;
+ rdraw.vgt_draw_initiator = vgt_draw_initiator;
+ rdraw.indices = NULL;
+ if (draw.index_buffer) {
+ rbuffer = (struct r600_resource*)draw.index_buffer;
+ rdraw.indices = rbuffer->bo;
+ rdraw.indices_bo_offset = draw.index_buffer_offset;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_context_draw(&rctx->ctx, &rdraw);
+ } else {
+ r600_context_draw(&rctx->ctx, &rdraw);
+ }
+
+ if (rctx->framebuffer.zsbuf)
+ {
+ struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
+ ((struct r600_resource_texture *)tex)->dirty_db = TRUE;
+ }
+
+ pipe_resource_reference(&draw.index_buffer, NULL);
+
+ u_vbuf_mgr_draw_end(rctx->vbuf_mgr);
+}
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index a0ec493fc85..3dd54f45202 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -253,9 +253,13 @@ static inline unsigned r600_tex_dim(unsigned dim)
default:
case PIPE_TEXTURE_1D:
return V_038000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_038000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_038000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_038000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_038000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
@@ -285,10 +289,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_SWAP_STD;
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_SWAP_ALT;
+
/* 16-bit buffers. */
case PIPE_FORMAT_B5G6R5_UNORM:
return V_0280A0_SWAP_STD_REV;
@@ -305,6 +313,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
return V_0280A0_SWAP_STD;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_SWAP_STD;
@@ -328,6 +337,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_X8R8G8B8_UNORM:
return V_0280A0_SWAP_ALT_REV;
case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
return V_0280A0_SWAP_STD;
@@ -346,9 +356,11 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_SNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_SWAP_STD_REV;
+ return V_0280A0_SWAP_STD;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return V_0280A0_SWAP_ALT;
case PIPE_FORMAT_R16G16_UNORM:
return V_0280A0_SWAP_STD;
@@ -356,14 +368,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
case PIPE_FORMAT_R16G16B16A16_SNORM:
- // return FMT_16_16_16_16;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
- // return FMT_16_16_16_16_FLOAT;
/* 128-bit buffers. */
- //case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return FMT_32_32_32_32_FLOAT;
- return 0;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_SWAP_STD;
default:
R600_ERR("unsupported colorswap format %d\n", format);
return ~0;
@@ -374,10 +385,14 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
{
switch (format) {
+ case PIPE_FORMAT_L4A4_UNORM:
+ return V_0280A0_COLOR_4_4;
+
/* 8-bit buffers. */
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R8_SNORM:
return V_0280A0_COLOR_8;
@@ -398,6 +413,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_16;
case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_L8A8_SRGB:
case PIPE_FORMAT_R8G8_UNORM:
return V_0280A0_COLOR_8_8;
@@ -425,7 +441,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R10G10B10X2_SNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
- return V_0280A0_COLOR_10_10_10_2;
+ return V_0280A0_COLOR_2_10_10_10;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
@@ -467,10 +483,13 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
return V_0280A0_COLOR_32_32;
/* 128-bit buffers. */
- //case PIPE_FORMAT_R32G32B32_FLOAT:
- // return V_0280A0_COLOR_32_32_32_FLOAT;
- //case PIPE_FORMAT_R32G32B32A32_FLOAT:
- // return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return V_0280A0_COLOR_32_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return V_0280A0_COLOR_32_32_32_32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return V_0280A0_COLOR_32_32_32_32;
/* YUV buffers. */
case PIPE_FORMAT_UYVY:
@@ -497,9 +516,37 @@ static INLINE boolean r600_is_zs_format_supported(enum pipe_format format)
return r600_translate_dbformat(format) != ~0;
}
-static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format)
+static INLINE boolean r600_is_vertex_format_supported(enum pipe_format format,
+ enum radeon_family family)
{
- return r600_translate_colorformat(format) != ~0;
+ unsigned i;
+ const struct util_format_description *desc = util_format_description(format);
+ if (!desc)
+ return FALSE;
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+ if (i == 4)
+ return FALSE;
+
+ /* No fixed, no double. */
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
+ (desc->channel[i].size == 64 &&
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))
+ return FALSE;
+
+ /* No scaled/norm formats with 32 bits per channel. */
+ if (desc->channel[i].size == 32 &&
+ (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED))
+ return FALSE;
+
+ return TRUE;
}
#endif
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 1f4f453c091..03af367401d 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -27,6 +27,7 @@
#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_format.h>
+#include <util/u_format_s3tc.h>
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
@@ -38,8 +39,6 @@
#include "r600d.h"
#include "r600_formats.h"
-extern struct u_resource_vtbl r600_texture_vtbl;
-
/* Copy from a full GPU texture to a transfer's staging one. */
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
@@ -77,17 +76,15 @@ unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
{
unsigned offset = rtex->offset[level];
- switch (rtex->resource.base.b.target) {
+ switch (rtex->resource.b.b.b.target) {
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- return offset + layer * rtex->layer_size[level];
default:
- assert(layer == 0);
- return offset;
+ return offset + layer * rtex->layer_size[level];
}
}
-static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
+static unsigned r600_get_block_alignment(struct pipe_screen *screen,
enum pipe_format format,
unsigned array_mode)
{
@@ -105,6 +102,9 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen,
(((rscreen->tiling_info->group_bytes / 8 / pixsize)) *
rscreen->tiling_info->num_banks)) * 8;
break;
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ p_align = MAX2(64, rscreen->tiling_info->group_bytes / pixsize);
+ break;
case V_038000_ARRAY_LINEAR_GENERAL:
default:
p_align = rscreen->tiling_info->group_bytes / pixsize;
@@ -124,8 +124,10 @@ static unsigned r600_get_height_alignment(struct pipe_screen *screen,
h_align = rscreen->tiling_info->num_channels * 8;
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
h_align = 8;
break;
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
h_align = 1;
break;
@@ -139,7 +141,7 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
{
struct r600_screen* rscreen = (struct r600_screen *)screen;
unsigned pixsize = util_format_get_blocksize(format);
- int p_align = r600_get_pixel_alignment(screen, format, array_mode);
+ int p_align = r600_get_block_alignment(screen, format, array_mode);
int h_align = r600_get_height_alignment(screen, array_mode);
int b_align;
@@ -149,6 +151,8 @@ static unsigned r600_get_base_alignment(struct pipe_screen *screen,
p_align * pixsize * h_align);
break;
case V_038000_ARRAY_1D_TILED_THIN1:
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ case V_038000_ARRAY_LINEAR_GENERAL:
default:
b_align = rscreen->tiling_info->group_bytes;
break;
@@ -165,55 +169,46 @@ static unsigned mip_minify(unsigned size, unsigned level)
return val;
}
-static unsigned r600_texture_get_stride(struct pipe_screen *screen,
- struct r600_resource_texture *rtex,
- unsigned level)
+static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen,
+ struct r600_resource_texture *rtex,
+ unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
- unsigned width, stride, tile_width;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
+ unsigned nblocksx, block_align, width;
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
if (rtex->pitch_override)
- return rtex->pitch_override;
+ return rtex->pitch_override / blocksize;
width = mip_minify(ptex->width0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_width = r600_get_pixel_alignment(screen, ptex->format,
- rtex->array_mode[level]);
- width = align(width, tile_width);
- }
- stride = util_format_get_stride(ptex->format, width);
+ nblocksx = util_format_get_nblocksx(ptex->format, width);
- return stride;
+ block_align = r600_get_block_alignment(screen, ptex->format,
+ rtex->array_mode[level]);
+ nblocksx = align(nblocksx, block_align);
+ return nblocksx;
}
static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
unsigned height, tile_height;
height = mip_minify(ptex->height0, level);
- if (util_format_is_plain(ptex->format)) {
- tile_height = r600_get_height_alignment(screen,
- rtex->array_mode[level]);
- height = align(height, tile_height);
- }
- return util_format_get_nblocksy(ptex->format, height);
-}
-
-/* Get a width in pixels from a stride in bytes. */
-static unsigned pitch_to_width(enum pipe_format format, unsigned pitch_in_bytes)
-{
- return (pitch_in_bytes / util_format_get_blocksize(format)) *
- util_format_get_blockwidth(format);
+ height = util_format_get_nblocksy(ptex->format, height);
+ tile_height = r600_get_height_alignment(screen,
+ rtex->array_mode[level]);
+ height = align(height, tile_height);
+ return height;
}
static void r600_texture_set_array_mode(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned level, unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
switch (array_mode) {
case V_0280A0_ARRAY_LINEAR_GENERAL:
@@ -227,7 +222,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen,
unsigned w, h, tile_height, tile_width;
tile_height = r600_get_height_alignment(screen, array_mode);
- tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode);
+ tile_width = r600_get_block_alignment(screen, ptex->format, array_mode);
w = mip_minify(ptex->width0, level);
h = mip_minify(ptex->height0, level);
@@ -244,40 +239,128 @@ static void r600_setup_miptree(struct pipe_screen *screen,
struct r600_resource_texture *rtex,
unsigned array_mode)
{
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
struct radeon *radeon = (struct radeon *)screen->winsys;
enum chip_class chipc = r600_get_family_class(radeon);
- unsigned pitch, size, layer_size, i, offset;
- unsigned nblocksy;
+ unsigned size, layer_size, i, offset;
+ unsigned nblocksx, nblocksy;
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
+ unsigned blocksize = util_format_get_blocksize(ptex->format);
+
r600_texture_set_array_mode(screen, rtex, i, array_mode);
- pitch = r600_texture_get_stride(screen, rtex, i);
+ nblocksx = r600_texture_get_nblocksx(screen, rtex, i);
nblocksy = r600_texture_get_nblocksy(screen, rtex, i);
- layer_size = pitch * nblocksy;
-
+ layer_size = nblocksx * nblocksy * blocksize;
if (ptex->target == PIPE_TEXTURE_CUBE) {
if (chipc >= R700)
size = layer_size * 8;
else
size = layer_size * 6;
}
- else
+ else if (ptex->target == PIPE_TEXTURE_3D)
size = layer_size * u_minify(ptex->depth0, i);
+ else
+ size = layer_size * ptex->array_size;
+
/* align base image and start of miptree */
if ((i == 0) || (i == 1))
offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode));
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
- rtex->pitch_in_bytes[i] = pitch;
- rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch);
+ rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */
+ rtex->pitch_in_bytes[i] = nblocksx * blocksize;
+
offset += size;
}
rtex->size = offset;
}
+/* Figure out whether u_blitter will fallback to a transfer operation.
+ * If so, don't use a staging resource.
+ */
+static boolean permit_hardware_blit(struct pipe_screen *screen,
+ const struct pipe_resource *res)
+{
+ unsigned bind;
+
+ if (util_format_is_depth_or_stencil(res->format))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ /* hackaround for S3TC */
+ if (util_format_is_s3tc(res->format))
+ return TRUE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ bind, 0))
+ return FALSE;
+
+ if (!screen->is_format_supported(screen,
+ res->format,
+ res->target,
+ res->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW, 0))
+ return FALSE;
+
+ return TRUE;
+}
+
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ return r600_bo_get_winsys_handle(radeon, resource->bo,
+ rtex->pitch_in_bytes[0], whandle);
+}
+
+static void r600_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *ptex)
+{
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+ struct radeon *radeon = (struct radeon *)screen->winsys;
+
+ if (rtex->flushed_depth_texture)
+ pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (resource->bo) {
+ r600_bo_reference(radeon, &resource->bo, NULL);
+ }
+ FREE(rtex);
+}
+
+static unsigned int r600_texture_is_referenced(struct pipe_context *context,
+ struct pipe_resource *texture,
+ unsigned level, int layer)
+{
+ /* FIXME */
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+ r600_texture_get_handle, /* get_handle */
+ r600_texture_destroy, /* resource_destroy */
+ r600_texture_is_referenced, /* is_resource_referenced */
+ r600_texture_get_transfer, /* get_transfer */
+ r600_texture_transfer_destroy, /* transfer_destroy */
+ r600_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region,/* transfer_flush_region */
+ r600_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
static struct r600_resource_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
@@ -295,21 +378,22 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
resource = &rtex->resource;
- resource->base.b = *base;
- resource->base.vtbl = &r600_texture_vtbl;
- pipe_reference_init(&resource->base.b.reference, 1);
- resource->base.b.screen = screen;
+ resource->b.b.b = *base;
+ resource->b.b.vtbl = &r600_texture_vtbl;
+ pipe_reference_init(&resource->b.b.b.reference, 1);
+ resource->b.b.b.screen = screen;
resource->bo = bo;
rtex->pitch_override = pitch_in_bytes_override;
+ /* only mark depth textures the HW can hit as depth textures */
+ if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base))
+ rtex->depth = 1;
- if (array_mode)
- rtex->tiled = 1;
r600_setup_miptree(screen, rtex, array_mode);
resource->size = rtex->size;
if (!resource->bo) {
- struct pipe_resource *ptex = &rtex->resource.base.b;
+ struct pipe_resource *ptex = &rtex->resource.b.b.b;
int base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage);
@@ -321,48 +405,6 @@ r600_texture_create_object(struct pipe_screen *screen,
return rtex;
}
-/* Figure out whether u_blitter will fallback to a transfer operation.
- * If so, don't use a staging resource.
- */
-static boolean permit_hardware_blit(struct pipe_screen *screen,
- const struct pipe_resource *res)
-{
- unsigned bind;
-
- if (util_format_is_depth_or_stencil(res->format))
- bind = PIPE_BIND_DEPTH_STENCIL;
- else
- bind = PIPE_BIND_RENDER_TARGET;
-
- /* See r600_resource_copy_region: there is something wrong
- * with depth resource copies at the moment so avoid them for
- * now.
- */
- if (util_format_get_component_bits(res->format,
- UTIL_FORMAT_COLORSPACE_ZS,
- 0) != 0)
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- bind, 0))
- return FALSE;
-
- if (!screen->is_format_supported(screen,
- res->format,
- res->target,
- res->nr_samples,
- PIPE_BIND_SAMPLER_VIEW, 0))
- return FALSE;
-
- if (res->usage == PIPE_USAGE_STREAM)
- return FALSE;
-
- return TRUE;
-}
-
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
@@ -381,46 +423,21 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
}
}
+ if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+ util_format_is_s3tc(templ->format))
+ array_mode = V_038000_ARRAY_1D_TILED_THIN1;
+
return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
0, 0, NULL);
}
-static void r600_texture_destroy(struct pipe_screen *screen,
- struct pipe_resource *ptex)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- if (rtex->flushed_depth_texture)
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
-
- if (resource->bo) {
- r600_bo_reference(radeon, &resource->bo, NULL);
- }
- FREE(rtex);
-}
-
-static boolean r600_texture_get_handle(struct pipe_screen* screen,
- struct pipe_resource *ptex,
- struct winsys_handle *whandle)
-{
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
- struct r600_resource *resource = &rtex->resource;
- struct radeon *radeon = (struct radeon *)screen->winsys;
-
- return r600_bo_get_winsys_handle(radeon, resource->bo,
- rtex->pitch_in_bytes[0], whandle);
-}
-
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *surf_tmpl)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
- unsigned tile_height;
unsigned level = surf_tmpl->u.tex.level;
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
@@ -440,8 +457,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
surface->base.u.tex.level = level;
- tile_height = r600_get_height_alignment(pipe->screen, rtex->array_mode[level]);
- surface->aligned_height = align(surface->base.height, tile_height);
+ surface->aligned_height = r600_texture_get_nblocksy(pipe->screen,
+ rtex, level);
return &surface->base;
}
@@ -477,16 +494,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
bo);
}
-static unsigned int r600_texture_is_referenced(struct pipe_context *context,
- struct pipe_resource *texture,
- unsigned level, int layer)
-{
- /* FIXME */
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
int r600_texture_depth_flush(struct pipe_context *ctx,
- struct pipe_resource *texture)
+ struct pipe_resource *texture, boolean just_create)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
struct pipe_resource resource;
@@ -499,7 +508,8 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
resource.width0 = texture->width0;
resource.height0 = texture->height0;
resource.depth0 = 1;
- resource.last_level = 0;
+ resource.array_size = 1;
+ resource.last_level = texture->last_level;
resource.nr_samples = 0;
resource.usage = PIPE_USAGE_DYNAMIC;
resource.bind = 0;
@@ -513,7 +523,11 @@ int r600_texture_depth_flush(struct pipe_context *ctx,
return -ENOMEM;
}
+ ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE;
out:
+ if (just_create)
+ return 0;
+
/* XXX: only do this if the depth texture has actually changed:
*/
r600_blit_uncompress_depth(ctx, rtex);
@@ -546,7 +560,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
* the CPU is much happier reading out of cached system memory
* than uncached VRAM.
*/
- if (rtex->tiled)
+ if (R600_TEX_IS_TILED(rtex, level))
use_staging_texture = TRUE;
if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024)
@@ -579,13 +593,16 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
*/
/* XXX: when discard is true, no need to read back from depth texture
*/
- r = r600_texture_depth_flush(ctx, texture);
+ r = r600_texture_depth_flush(ctx, texture, FALSE);
if (r < 0) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
pipe_resource_reference(&trans->transfer.resource, NULL);
FREE(trans);
return NULL;
}
+ trans->transfer.stride = rtex->flushed_depth_texture->pitch_in_bytes[level];
+ trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z);
+ return &trans->transfer;
} else if (use_staging_texture) {
resource.target = PIPE_TEXTURE_2D;
resource.format = texture->format;
@@ -627,6 +644,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
return &trans->transfer;
}
trans->transfer.stride = rtex->pitch_in_bytes[level];
+ trans->transfer.layer_stride = rtex->layer_size[level];
trans->offset = r600_texture_get_offset(rtex, level, box->z);
return &trans->transfer;
}
@@ -635,7 +653,8 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource;
+ struct pipe_resource *texture = transfer->resource;
+ struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
if (rtransfer->staging_texture) {
if (transfer->usage & PIPE_TRANSFER_WRITE) {
@@ -643,9 +662,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
}
pipe_resource_reference(&rtransfer->staging_texture, NULL);
}
- if (rtex->flushed_depth_texture) {
- pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+
+ if (rtex->depth && !rtex->is_flushing_texture) {
+ if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtex->flushed_depth_texture)
+ r600_blit_push_depth(ctx, rtex);
}
+
pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
}
@@ -727,19 +749,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
r600_bo_unmap(radeon, bo);
}
-struct u_resource_vtbl r600_texture_vtbl =
-{
- r600_texture_get_handle, /* get_handle */
- r600_texture_destroy, /* resource_destroy */
- r600_texture_is_referenced, /* is_resource_referenced */
- r600_texture_get_transfer, /* get_transfer */
- r600_texture_transfer_destroy, /* transfer_destroy */
- r600_texture_transfer_map, /* transfer_map */
- u_default_transfer_flush_region,/* transfer_flush_region */
- r600_texture_transfer_unmap, /* transfer_unmap */
- u_default_transfer_inline_write /* transfer_inline_write */
-};
-
void r600_init_surface_functions(struct r600_pipe_context *r600)
{
r600->context.create_surface = r600_create_surface;
@@ -802,6 +811,8 @@ uint32_t r600_translate_texformat(enum pipe_format format,
uint32_t result = 0, word4 = 0, yuv_format = 0;
const struct util_format_description *desc;
boolean uniform = TRUE;
+ static int r600_enable_s3tc = -1;
+
int i;
const uint32_t sign_bit[4] = {
S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
@@ -853,34 +864,55 @@ uint32_t r600_translate_texformat(enum pipe_format format,
case UTIL_FORMAT_COLORSPACE_SRGB:
word4 |= S_038010_FORCE_DEGAMMA(1);
- if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
- goto out_unknown; /* fails for some reason - TODO */
break;
default:
break;
}
- /* S3TC formats. TODO */
- if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- static int r600_enable_s3tc = -1;
+ if (r600_enable_s3tc == -1)
+ r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ if (!r600_enable_s3tc)
+ goto out_unknown;
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ result = FMT_BC4;
+ goto out_word4;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ result = FMT_BC5;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
+ }
- if (r600_enable_s3tc == -1)
- r600_enable_s3tc =
- debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
if (!r600_enable_s3tc)
goto out_unknown;
+ if (!util_format_s3tc_enabled) {
+ goto out_unknown;
+ }
+
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
result = FMT_BC1;
goto out_word4;
case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
result = FMT_BC2;
goto out_word4;
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
result = FMT_BC3;
goto out_word4;
default:
@@ -897,8 +929,6 @@ uint32_t r600_translate_texformat(enum pipe_format format,
/* R8G8Bx_SNORM - TODO CxV8U8 */
- /* RGTC - TODO */
-
/* See whether the components are of the same size. */
for (i = 1; i < desc->nr_channels; i++) {
uniform = uniform && desc->channel[0].size == desc->channel[i].size;
@@ -927,7 +957,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
desc->channel[3].size == 2) {
- result = FMT_10_10_10_2;
+ result = FMT_2_10_10_10;
goto out_word4;
}
goto out_unknown;
@@ -990,6 +1020,19 @@ uint32_t r600_translate_texformat(enum pipe_format format,
result = FMT_16_16_16_16;
goto out_word4;
}
+ goto out_unknown;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result = FMT_32;
+ goto out_word4;
+ case 2:
+ result = FMT_32_32;
+ goto out_word4;
+ case 4:
+ result = FMT_32_32_32_32;
+ goto out_word4;
+ }
}
goto out_unknown;
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index f80fa7af941..7482d15e12f 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -22,178 +22,34 @@
*
* Authors: Dave Airlie <[email protected]>
*/
-#include "translate/translate_cache.h"
-#include "translate/translate.h"
-#include <pipebuffer/pb_buffer.h>
+
#include <util/u_index_modify.h>
+#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "r600_pipe.h"
-void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
- struct translate_key key = {0};
- struct translate_element *te;
- unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
- struct translate *tr;
- struct r600_vertex_element *ve = rctx->vertex_elements;
- boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
- void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
- struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
- struct pipe_resource *out_buffer;
- unsigned i, num_verts;
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
- void *tmp;
-
- /* Initialize the translate key, i.e. the recipe how vertices should be
- * translated. */
- for (i = 0; i < ve->count; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index];
- enum pipe_format output_format = ve->hw_format[i];
- unsigned output_format_size = ve->hw_format_size[i];
-
- /* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i]) {
- continue;
- }
-
- /* Workaround for translate: output floats instead of halfs. */
- switch (output_format) {
- case PIPE_FORMAT_R16_FLOAT:
- output_format = PIPE_FORMAT_R32_FLOAT;
- output_format_size = 4;
- break;
- case PIPE_FORMAT_R16G16_FLOAT:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- output_format_size = 8;
- break;
- case PIPE_FORMAT_R16G16B16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- output_format_size = 12;
- break;
- case PIPE_FORMAT_R16G16B16A16_FLOAT:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- output_format_size = 16;
- break;
- default:;
- }
-
- /* Add this vertex element. */
- te = &key.element[key.nr_elements];
- /*te->type;
- te->instance_divisor;*/
- te->input_buffer = ve->elements[i].vertex_buffer_index;
- te->input_format = ve->elements[i].src_format;
- te->input_offset = vb->buffer_offset + ve->elements[i].src_offset;
- te->output_format = output_format;
- te->output_offset = key.output_stride;
-
- key.output_stride += output_format_size;
- vb_translated[ve->elements[i].vertex_buffer_index] = TRUE;
- tr_elem_index[i] = key.nr_elements;
- key.nr_elements++;
- }
-
- /* Get a translate object. */
- tr = translate_cache_find(rctx->tran.translate_cache, &key);
-
- /* Map buffers we want to translate. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
- PIPE_TRANSFER_READ, &vb_transfer[i]);
-
- tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
- }
- }
-
- /* Create and map the output buffer. */
- num_verts = rctx->vb_max_index + 1;
-
- out_buffer = pipe_buffer_create(&rctx->screen->screen,
- PIPE_BIND_VERTEX_BUFFER,
- key.output_stride * num_verts);
-
- out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
- &out_transfer);
-
- /* Translate. */
- tr->run(tr, 0, num_verts, 0, out_map);
-
- /* Unmap all buffers. */
- for (i = 0; i < rctx->nvertex_buffer; i++) {
- if (vb_translated[i]) {
- pipe_buffer_unmap(pipe, vb_transfer[i]);
- }
- }
-
- pipe_buffer_unmap(pipe, out_transfer);
-
- /* Setup the new vertex buffer in the first free slot. */
- for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
- struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
-
- if (!vb->buffer) {
- pipe_resource_reference(&vb->buffer, out_buffer);
- vb->buffer_offset = 0;
- vb->max_index = num_verts - 1;
- vb->stride = key.output_stride;
- rctx->tran.vb_slot = i;
- break;
- }
- }
-
- /* Save and replace vertex elements. */
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
- }
-
- tmp = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, tmp);
- rctx->tran.new_velems = tmp;
-
- pipe_resource_reference(&out_buffer, NULL);
-}
-
-void r600_end_vertex_translate(struct r600_pipe_context *rctx)
-{
- struct pipe_context *pipe = &rctx->context;
-
- if (rctx->tran.new_velems == NULL) {
- return;
- }
- /* Restore vertex elements. */
- pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
- rctx->tran.new_velems = NULL;
-
- /* Delete the now-unused VBO. */
- pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, NULL);
-}
void r600_translate_index_buffer(struct r600_pipe_context *r600,
- struct pipe_resource **index_buffer,
- unsigned *index_size,
- unsigned *start, unsigned count)
+ struct pipe_resource **index_buffer,
+ unsigned *index_size,
+ unsigned *start, unsigned count)
{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+ boolean flushed;
+
switch (*index_size) {
case 1:
- util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count);
+ u_upload_alloc(r600->vbuf_mgr->uploader, 0, count * 2,
+ &out_offset, &out_buffer, &flushed, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r600->context, *index_buffer, 0, *start, count, ptr);
+
+ pipe_resource_reference(index_buffer, out_buffer);
*index_size = 2;
- *start = 0;
- break;
- case 2:
- case 4:
+ *start = out_offset / 2;
break;
}
}
diff --git a/src/gallium/drivers/r600/r600_upload.c b/src/gallium/drivers/r600/r600_upload.c
deleted file mode 100644
index 44102ff55b6..00000000000
--- a/src/gallium/drivers/r600/r600_upload.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <[email protected]>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse <[email protected]>
- */
-#include <errno.h>
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "r600.h"
-#include "r600_pipe.h"
-#include "r600_resource.h"
-
-struct r600_upload {
- struct r600_pipe_context *rctx;
- struct r600_bo *buffer;
- char *ptr;
- unsigned size;
- unsigned default_size;
- unsigned total_alloc_size;
- unsigned offset;
- unsigned alignment;
-};
-
-struct r600_upload *r600_upload_create(struct r600_pipe_context *rctx,
- unsigned default_size,
- unsigned alignment)
-{
- struct r600_upload *upload = CALLOC_STRUCT(r600_upload);
-
- if (upload == NULL)
- return NULL;
-
- upload->rctx = rctx;
- upload->size = 0;
- upload->default_size = default_size;
- upload->alignment = alignment;
- upload->ptr = NULL;
- upload->buffer = NULL;
- upload->total_alloc_size = 0;
-
- return upload;
-}
-
-void r600_upload_flush(struct r600_upload *upload)
-{
- if (upload->buffer) {
- r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
- }
- upload->default_size = MAX2(upload->total_alloc_size, upload->default_size);
- upload->total_alloc_size = 0;
- upload->size = 0;
- upload->offset = 0;
- upload->ptr = NULL;
- upload->buffer = NULL;
-}
-
-void r600_upload_destroy(struct r600_upload *upload)
-{
- r600_upload_flush(upload);
- FREE(upload);
-}
-
-int r600_upload_buffer(struct r600_upload *upload, unsigned offset,
- unsigned size, struct r600_resource_buffer *in_buffer,
- unsigned *out_offset, unsigned *out_size,
- struct r600_bo **out_buffer)
-{
- unsigned alloc_size = align(size, upload->alignment);
- const void *in_ptr = NULL;
-
- if (upload->offset + alloc_size > upload->size) {
- if (upload->size) {
- r600_bo_reference(upload->rctx->radeon, &upload->buffer, NULL);
- }
- upload->size = align(MAX2(upload->default_size, alloc_size), 4096);
- upload->total_alloc_size += upload->size;
- upload->offset = 0;
- upload->buffer = r600_bo(upload->rctx->radeon, upload->size, 4096, PIPE_BIND_VERTEX_BUFFER, 0);
- if (upload->buffer == NULL) {
- return -ENOMEM;
- }
- upload->ptr = r600_bo_map(upload->rctx->radeon, upload->buffer, 0, NULL);
- }
-
- in_ptr = in_buffer->user_buffer;
- memcpy(upload->ptr + upload->offset, (uint8_t *) in_ptr + offset, size);
- *out_offset = upload->offset;
- *out_size = upload->size;
- *out_buffer = NULL;
- r600_bo_reference(upload->rctx->radeon, out_buffer, upload->buffer);
- upload->offset += alloc_size;
-
- return 0;
-}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 8c391936db0..e8558c49a7c 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -248,6 +248,8 @@
#define S_0280A0_SOURCE_FORMAT(x) (((x) & 0x1) << 27)
#define G_0280A0_SOURCE_FORMAT(x) (((x) >> 27) & 0x1)
#define C_0280A0_SOURCE_FORMAT 0xF7FFFFFF
+#define V_0280A0_EXPORT_FULL 0
+#define V_0280A0_EXPORT_NORM 1
#define R_028060_CB_COLOR0_SIZE 0x028060
#define S_028060_PITCH_TILE_MAX(x) (((x) & 0x3FF) << 0)
#define G_028060_PITCH_TILE_MAX(x) (((x) >> 0) & 0x3FF)
@@ -2332,31 +2334,6 @@
#define R_0280D4_CB_COLOR5_TILE 0x0280D4
#define R_0280D8_CB_COLOR6_TILE 0x0280D8
#define R_0280DC_CB_COLOR7_TILE 0x0280DC
-#define R_028808_CB_COLOR_CONTROL 0x028808
-#define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0)
-#define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1)
-#define C_028808_FOG_ENABLE 0xFFFFFFFE
-#define S_028808_MULTIWRITE_ENABLE(x) (((x) & 0x1) << 1)
-#define G_028808_MULTIWRITE_ENABLE(x) (((x) >> 1) & 0x1)
-#define C_028808_MULTIWRITE_ENABLE 0xFFFFFFFD
-#define S_028808_DITHER_ENABLE(x) (((x) & 0x1) << 2)
-#define G_028808_DITHER_ENABLE(x) (((x) >> 2) & 0x1)
-#define C_028808_DITHER_ENABLE 0xFFFFFFFB
-#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
-#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
-#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
-#define S_028808_SPECIAL_OP(x) (((x) & 0x7) << 4)
-#define G_028808_SPECIAL_OP(x) (((x) >> 4) & 0x7)
-#define C_028808_SPECIAL_OP 0xFFFFFF8F
-#define S_028808_PER_MRT_BLEND(x) (((x) & 0x1) << 7)
-#define G_028808_PER_MRT_BLEND(x) (((x) >> 7) & 0x1)
-#define C_028808_PER_MRT_BLEND 0xFFFFFF7F
-#define S_028808_TARGET_BLEND_ENABLE(x) (((x) & 0xFF) << 8)
-#define G_028808_TARGET_BLEND_ENABLE(x) (((x) >> 8) & 0xFF)
-#define C_028808_TARGET_BLEND_ENABLE 0xFFFF00FF
-#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
-#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
-#define C_028808_ROP3 0xFF00FFFF
#define R_028614_SPI_VS_OUT_ID_0 0x028614
#define S_028614_SEMANTIC_0(x) (((x) & 0xFF) << 0)
#define G_028614_SEMANTIC_0(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c
index 94e57e40f86..3aefb5b3bb5 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -987,6 +987,19 @@ rbug_context_transfer_inline_write(struct pipe_context *_context,
}
+static void rbug_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct rbug_context *rb_pipe = rbug_context(_context);
+ struct rbug_resource *rb_resource = rbug_resource(_resource);
+ struct pipe_context *context = rb_pipe->pipe;
+ struct pipe_resource *resource = rb_resource->resource;
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
+
+
struct pipe_context *
rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
{
@@ -1072,6 +1085,7 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
rb_pipe->base.transfer_unmap = rbug_context_transfer_unmap;
rb_pipe->base.transfer_flush_region = rbug_context_transfer_flush_region;
rb_pipe->base.transfer_inline_write = rbug_context_transfer_inline_write;
+ rb_pipe->base.redefine_user_buffer = rbug_redefine_user_buffer;
rb_pipe->pipe = pipe;
diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c
index 7d7cc482ae6..15f5db40093 100644
--- a/src/gallium/drivers/rbug/rbug_objects.c
+++ b/src/gallium/drivers/rbug/rbug_objects.c
@@ -98,8 +98,9 @@ rbug_surface_create(struct rbug_context *rb_context,
pipe_reference_init(&rb_surface->base.reference, 1);
rb_surface->base.texture = NULL;
+ rb_surface->base.context = &rb_context->base;
+ rb_surface->surface = surface; /* we own the surface already */
pipe_resource_reference(&rb_surface->base.texture, &rb_resource->base);
- rb_surface->surface = surface;
return &rb_surface->base;
@@ -113,8 +114,7 @@ rbug_surface_destroy(struct rbug_context *rb_context,
struct rbug_surface *rb_surface)
{
pipe_resource_reference(&rb_surface->base.texture, NULL);
- rb_context->pipe->surface_destroy(rb_context->pipe,
- rb_surface->surface);
+ pipe_surface_reference(&rb_surface->surface, NULL);
FREE(rb_surface);
}
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index fe54f92addf..70fdfb7ddf3 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -230,7 +230,7 @@ softpipe_create_context( struct pipe_screen *screen,
softpipe->use_sse = FALSE;
#endif
- softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+ softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
softpipe->pipe.winsys = NULL;
@@ -315,7 +315,7 @@ softpipe_create_context( struct pipe_screen *screen,
(struct tgsi_sampler **)
softpipe->tgsi.geom_samplers_list);
- if (debug_get_bool_option( "SP_NO_RAST", FALSE ))
+ if (debug_get_bool_option( "SOFTPIPE_NO_RAST", FALSE ))
softpipe->no_rast = TRUE;
softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe);
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 035d712d17c..c91709aef06 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -58,7 +58,7 @@ struct softpipe_context {
/** Constant state objects */
struct pipe_blend_state *blend;
- struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *fragment_samplers[PIPE_MAX_SAMPLERS];
struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS];
struct pipe_depth_stencil_alpha_state *depth_stencil;
@@ -91,8 +91,8 @@ struct softpipe_context {
} so_target;
struct pipe_query_data_so_statistics so_stats;
- unsigned num_samplers;
- unsigned num_sampler_views;
+ unsigned num_fragment_samplers;
+ unsigned num_fragment_sampler_views;
unsigned num_vertex_samplers;
unsigned num_vertex_sampler_views;
unsigned num_geometry_samplers;
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index d422cb17a4b..6f7addd441a 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -51,7 +51,7 @@ softpipe_flush( struct pipe_context *pipe,
draw_flush(softpipe->draw);
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- for (i = 0; i < softpipe->num_sampler_views; i++) {
+ for (i = 0; i < softpipe->num_fragment_sampler_views; i++) {
sp_flush_tex_tile_cache(softpipe->fragment_tex_cache[i]);
}
for (i = 0; i < softpipe->num_vertex_sampler_views; i++) {
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index c433405cb66..a06817c5735 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -126,6 +126,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1;
case PIPE_CAP_INSTANCED_DRAWING:
return 1;
+ case PIPE_CAP_ARRAY_TEXTURES:
+ return 1;
default:
return 0;
}
@@ -186,7 +188,9 @@ softpipe_is_format_supported( struct pipe_screen *screen,
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_1D_ARRAY ||
target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_2D_ARRAY ||
target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 5d727dc00df..0ce28f4c6ee 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -575,7 +575,7 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot)
setup->coef[slot].dady[0] = 0.0;
/*Y*/
setup->coef[slot].a0[1] =
- (spfs->origin_lower_left ? setup->softpipe->framebuffer.height : 0)
+ (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
+ (spfs->pixel_center_integer ? 0.0 : 0.5);
setup->coef[slot].dadx[1] = 0.0;
setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0;
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index 38943563800..60331bc4976 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -76,18 +76,18 @@ softpipe_bind_fragment_sampler_states(struct pipe_context *pipe,
assert(num <= PIPE_MAX_SAMPLERS);
/* Check for no-op */
- if (num == softpipe->num_samplers &&
- !memcmp(softpipe->sampler, sampler, num * sizeof(void *)))
+ if (num == softpipe->num_fragment_samplers &&
+ !memcmp(softpipe->fragment_samplers, sampler, num * sizeof(void *)))
return;
draw_flush(softpipe->draw);
for (i = 0; i < num; ++i)
- softpipe->sampler[i] = sampler[i];
+ softpipe->fragment_samplers[i] = sampler[i];
for (i = num; i < PIPE_MAX_SAMPLERS; ++i)
- softpipe->sampler[i] = NULL;
+ softpipe->fragment_samplers[i] = NULL;
- softpipe->num_samplers = num;
+ softpipe->num_fragment_samplers = num;
softpipe->dirty |= SP_NEW_SAMPLER;
}
@@ -191,7 +191,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe,
assert(num <= PIPE_MAX_SAMPLERS);
/* Check for no-op */
- if (num == softpipe->num_sampler_views &&
+ if (num == softpipe->num_fragment_sampler_views &&
!memcmp(softpipe->fragment_sampler_views, views,
num * sizeof(struct pipe_sampler_view *)))
return;
@@ -205,7 +205,7 @@ softpipe_set_fragment_sampler_views(struct pipe_context *pipe,
sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[i], view);
}
- softpipe->num_sampler_views = num;
+ softpipe->num_fragment_sampler_views = num;
softpipe->dirty |= SP_NEW_TEXTURE;
}
@@ -374,10 +374,10 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe)
}
for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) {
- if (softpipe->sampler[i]) {
+ if (softpipe->fragment_samplers[i]) {
softpipe->tgsi.frag_samplers_list[i] =
get_sampler_variant( i,
- sp_sampler(softpipe->sampler[i]),
+ sp_sampler(softpipe->fragment_samplers[i]),
softpipe->fragment_sampler_views[i],
TGSI_PROCESSOR_FRAGMENT );
diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c
index 5f4d661abde..aa0b333c7a9 100644
--- a/src/gallium/drivers/softpipe/sp_state_vertex.c
+++ b/src/gallium/drivers/softpipe/sp_state_vertex.c
@@ -34,6 +34,7 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_transfer.h"
#include "draw/draw_context.h"
@@ -119,4 +120,5 @@ softpipe_init_vertex_funcs(struct pipe_context *pipe)
pipe->set_vertex_buffers = softpipe_set_vertex_buffers;
pipe->set_index_buffer = softpipe_set_index_buffer;
+ pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 242c27c7ebd..8a4ef934348 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -539,6 +539,19 @@ wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size,
}
+/**
+ * Do coordinate to array index conversion. For array textures.
+ */
+static INLINE void
+wrap_array_layer(const float coord[4], unsigned size, int layer[4])
+{
+ uint ch;
+ for (ch = 0; ch < 4; ch++) {
+ int c = util_ifloor(coord[ch] + 0.5F);
+ layer[ch] = CLAMP(c, 0, size - 1);
+ }
+}
+
/**
* Examine the quad's texture coordinates to compute the partial
@@ -761,6 +774,43 @@ get_texel_3d(const struct sp_sampler_variant *samp,
}
+/* Get texel pointer for 1D array texture */
+static INLINE const float *
+get_texel_1d_array(const struct sp_sampler_variant *samp,
+ union tex_tile_address addr, int x, int y)
+{
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level = addr.bits.level;
+
+ if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_2d_no_border(samp, addr, x, y);
+ }
+}
+
+
+/* Get texel pointer for 2D array texture */
+static INLINE const float *
+get_texel_2d_array(const struct sp_sampler_variant *samp,
+ union tex_tile_address addr, int x, int y, int layer)
+{
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level = addr.bits.level;
+
+ assert(layer < texture->array_size);
+
+ if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+ y < 0 || y >= (int) u_minify(texture->height0, level)) {
+ return samp->sampler->border_color;
+ }
+ else {
+ return get_texel_3d_no_border(samp, addr, x, y, layer);
+ }
+}
+
+
/**
* Given the logbase2 of a mipmap's base level size and a mipmap level,
* return the size (in texels) of that mipmap level.
@@ -990,6 +1040,47 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width;
+ int x[4], layer[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ wrap_array_layer(t, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_1d_array(samp, addr, x[j], layer[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
+ }
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
+}
+
+
+static void
img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1033,6 +1124,50 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
}
+static void
+img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width, height;
+ int x[4], y[4], layer[4];
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->nearest_texcoord_s(s, width, x);
+ samp->nearest_texcoord_t(t, height, y);
+ wrap_array_layer(p, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *out = get_texel_2d_array(samp, addr, x[j], y[j], layer[j]);
+ int c;
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = out[c];
+ }
+ }
+
+ if (DEBUG_TEX) {
+ print_sample(__FUNCTION__, rgba);
+ }
+}
+
+
static INLINE union tex_tile_address
face(union tex_tile_address addr, unsigned face )
{
@@ -1168,6 +1303,47 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width;
+ int x0[4], x1[4], layer[4];
+ float xw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+
+ assert(width > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ wrap_array_layer(t, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_1d_array(samp, addr, x0[j], layer[j]);
+ const float *tx1 = get_texel_1d_array(samp, addr, x1[j], layer[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
+ }
+ }
+}
+
+
+static void
img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1215,6 +1391,54 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
static void
+img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+ const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+ const struct pipe_resource *texture = samp->view->texture;
+ unsigned level0, j;
+ int width, height;
+ int x0[4], y0[4], x1[4], y1[4], layer[4];
+ float xw[4], yw[4]; /* weights */
+ union tex_tile_address addr;
+
+ level0 = samp->level;
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+
+ assert(width > 0);
+ assert(height > 0);
+
+ addr.value = 0;
+ addr.bits.level = samp->level;
+
+ samp->linear_texcoord_s(s, width, x0, x1, xw);
+ samp->linear_texcoord_t(t, height, y0, y1, yw);
+ wrap_array_layer(p, texture->array_size, layer);
+
+ for (j = 0; j < QUAD_SIZE; j++) {
+ const float *tx0 = get_texel_2d_array(samp, addr, x0[j], y0[j], layer[j]);
+ const float *tx1 = get_texel_2d_array(samp, addr, x1[j], y0[j], layer[j]);
+ const float *tx2 = get_texel_2d_array(samp, addr, x0[j], y1[j], layer[j]);
+ const float *tx3 = get_texel_2d_array(samp, addr, x1[j], y1[j], layer[j]);
+ int c;
+
+ /* interpolate R, G, B, A */
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = lerp_2d(xw[j], yw[j],
+ tx0[c], tx1[c],
+ tx2[c], tx3[c]);
+ }
+ }
+}
+
+
+static void
img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
@@ -1906,8 +2130,10 @@ get_lambda_func(const union sp_sampler_key key)
switch (key.bits.target) {
case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
return compute_lambda_1d;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return compute_lambda_2d;
@@ -1932,6 +2158,12 @@ get_img_filter(const union sp_sampler_key key,
else
return img_filter_1d_linear;
break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_1d_array_nearest;
+ else
+ return img_filter_1d_array_linear;
+ break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
/* Try for fast path:
@@ -1967,6 +2199,12 @@ get_img_filter(const union sp_sampler_key key,
else
return img_filter_2d_linear;
break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ if (filter == PIPE_TEX_FILTER_NEAREST)
+ return img_filter_2d_array_nearest;
+ else
+ return img_filter_2d_array_linear;
+ break;
case PIPE_TEXTURE_CUBE:
if (filter == PIPE_TEX_FILTER_NEAREST)
return img_filter_cube_nearest;
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index e42015ad498..e589ee7c841 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -251,6 +251,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_level != addr.bits.level ||
tc->tex_z != addr.bits.z) {
/* get new transfer (view into texture) */
+ unsigned width, height, layer;
if (tc->tex_trans) {
if (tc->tex_trans_map) {
@@ -262,14 +263,22 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_trans = NULL;
}
+ width = u_minify(tc->texture->width0, addr.bits.level);
+ if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ height = tc->texture->array_size;
+ layer = 0;
+ }
+ else {
+ height = u_minify(tc->texture->height0, addr.bits.level);
+ layer = addr.bits.face + addr.bits.z;
+ }
+
tc->tex_trans =
pipe_get_transfer(tc->pipe, tc->texture,
addr.bits.level,
- addr.bits.face + addr.bits.z,
+ layer,
PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
- 0, 0,
- u_minify(tc->texture->width0, addr.bits.level),
- u_minify(tc->texture->height0, addr.bits.level));
+ 0, 0, width, height);
tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);
@@ -278,22 +287,17 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
tc->tex_z = addr.bits.z;
}
- /* get tile from the transfer (view into texture)
- * Note we're using the swizzle version of this fuction only because
- * we need to pass the texture cache's format explicitly.
+ /* Get tile from the transfer (view into texture), explicitly passing
+ * the image format.
*/
- pipe_get_tile_swizzle(tc->pipe,
- tc->tex_trans,
- addr.bits.x * TILE_SIZE,
- addr.bits.y * TILE_SIZE,
- TILE_SIZE,
- TILE_SIZE,
- PIPE_SWIZZLE_RED,
- PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_BLUE,
- PIPE_SWIZZLE_ALPHA,
- tc->format,
- (float *) tile->data.color);
+ pipe_get_tile_rgba_format(tc->pipe,
+ tc->tex_trans,
+ addr.bits.x * TILE_SIZE,
+ addr.bits.y * TILE_SIZE,
+ TILE_SIZE,
+ TILE_SIZE,
+ tc->format,
+ (float *) tile->data.color);
tile->addr = addr;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index 2220955b715..9bced37990a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -92,7 +92,7 @@ struct softpipe_tex_tile_cache
unsigned swizzle_g;
unsigned swizzle_b;
unsigned swizzle_a;
- unsigned format;
+ enum pipe_format format;
struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */
};
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 509d9982b17..95374c34ec3 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -62,13 +62,21 @@ softpipe_resource_layout(struct pipe_screen *screen,
unsigned buffer_size = 0;
for (level = 0; level <= pt->last_level; level++) {
+ unsigned slices;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ slices = 6;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ slices = depth;
+ else
+ slices = pt->array_size;
+
spr->stride[level] = util_format_get_stride(pt->format, width);
spr->level_offset[level] = buffer_size;
buffer_size += (util_format_get_nblocksy(pt->format, height) *
- ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
- spr->stride[level]);
+ slices * spr->stride[level]);
width = u_minify(width, 1);
height = u_minify(height, 1);
@@ -227,9 +235,13 @@ sp_get_tex_image_offset(const struct softpipe_resource *spr,
unsigned offset = spr->level_offset[level];
if (spr->base.target == PIPE_TEXTURE_CUBE ||
- spr->base.target == PIPE_TEXTURE_3D) {
+ spr->base.target == PIPE_TEXTURE_3D ||
+ spr->base.target == PIPE_TEXTURE_2D_ARRAY) {
offset += layer * nblocksy * spr->stride[level];
}
+ else if (spr->base.target == PIPE_TEXTURE_1D_ARRAY) {
+ offset += layer * spr->stride[level];
+ }
else {
assert(layer == 0);
}
@@ -292,7 +304,7 @@ softpipe_surface_destroy(struct pipe_context *pipe,
* a resource object.
* \param pipe rendering context
* \param resource the resource to transfer in/out of
- * \param sr indicates cube face or 3D texture slice
+ * \param level which mipmap level
* \param usage bitmask of PIPE_TRANSFER_x flags
* \param box the 1D/2D/3D region of interest
*/
@@ -311,8 +323,21 @@ softpipe_get_transfer(struct pipe_context *pipe,
/* make sure the requested region is in the image bounds */
assert(box->x + box->width <= u_minify(resource->width0, level));
- assert(box->y + box->height <= u_minify(resource->height0, level));
- assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
+ if (resource->target == PIPE_TEXTURE_1D_ARRAY) {
+ assert(box->y + box->height <= resource->array_size);
+ }
+ else {
+ assert(box->y + box->height <= u_minify(resource->height0, level));
+ if (resource->target == PIPE_TEXTURE_2D_ARRAY) {
+ assert(box->z + box->depth <= resource->array_size);
+ }
+ else if (resource->target == PIPE_TEXTURE_CUBE) {
+ assert(box->z < 6);
+ }
+ else {
+ assert(box->z + box->depth <= (u_minify(resource->depth0, level)));
+ }
+ }
/*
* Transfers, like other pipe operations, must happen in order, so flush the
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 480860af63b..60870b8bee5 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -357,11 +357,12 @@ sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos)
tc->entries[pos]->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(tc->pipe, tc->transfer,
- tc->tile_addrs[pos].bits.x * TILE_SIZE,
- tc->tile_addrs[pos].bits.y * TILE_SIZE,
- TILE_SIZE, TILE_SIZE,
- (float *) tc->entries[pos]->data.color);
+ pipe_put_tile_rgba_format(tc->pipe, tc->transfer,
+ tc->tile_addrs[pos].bits.x * TILE_SIZE,
+ tc->tile_addrs[pos].bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ tc->surface->format,
+ (float *) tc->entries[pos]->data.color);
}
tc->tile_addrs[pos].bits.invalid = 1; /* mark as empty */
}
@@ -468,11 +469,12 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
tile->data.depth32, 0/*STRIDE*/);
}
else {
- pipe_put_tile_rgba(tc->pipe, pt,
- tc->tile_addrs[pos].bits.x * TILE_SIZE,
- tc->tile_addrs[pos].bits.y * TILE_SIZE,
- TILE_SIZE, TILE_SIZE,
- (float *) tile->data.color);
+ pipe_put_tile_rgba_format(tc->pipe, pt,
+ tc->tile_addrs[pos].bits.x * TILE_SIZE,
+ tc->tile_addrs[pos].bits.y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ tc->surface->format,
+ (float *) tile->data.color);
}
}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 1e513f1039f..f0f875b2b23 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -34,6 +34,7 @@
#include "svga_context.h"
#include "svga_screen.h"
+#include "svga_surface.h"
#include "svga_resource_texture.h"
#include "svga_resource_buffer.h"
#include "svga_resource.h"
@@ -43,6 +44,12 @@
#include "svga_debug.h"
#include "svga_state.h"
+DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(use_min_mipmap, "SVGA_USE_MIN_MIPMAP", FALSE);
+DEBUG_GET_ONCE_NUM_OPTION(disable_shader, "SVGA_DISABLE_SHADER", ~0);
+DEBUG_GET_ONCE_BOOL_OPTION(no_line_width, "SVGA_NO_LINE_WIDTH", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE", FALSE);
static void svga_destroy( struct pipe_context *pipe )
{
@@ -113,13 +120,12 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
/* debug */
- svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
- svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
- svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE);
- svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
-
- if (!svga_init_swtnl(svga))
- goto no_swtnl;
+ svga->debug.no_swtnl = debug_get_option_no_swtnl();
+ svga->debug.force_swtnl = debug_get_option_force_swtnl();
+ svga->debug.use_min_mipmap = debug_get_option_use_min_mipmap();
+ svga->debug.disable_shader = debug_get_option_disable_shader();
+ svga->debug.no_line_width = debug_get_option_no_line_width();
+ svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
svga->fs_bm = util_bitmask_create();
if (svga->fs_bm == NULL)
@@ -149,6 +155,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
if (svga->hwtnl == NULL)
goto no_hwtnl;
+ if (!svga_init_swtnl(svga))
+ goto no_swtnl;
ret = svga_emit_initial_state( svga );
if (ret)
@@ -171,6 +179,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
return &svga->pipe;
no_state:
+ svga_destroy_swtnl(svga);
+no_swtnl:
svga_hwtnl_destroy( svga->hwtnl );
no_hwtnl:
u_upload_destroy( svga->upload_vb );
@@ -181,8 +191,6 @@ no_upload_ib:
no_vs_bm:
util_bitmask_destroy( svga->fs_bm );
no_fs_bm:
- svga_destroy_swtnl(svga);
-no_swtnl:
svga->swc->destroy(svga->swc);
no_swc:
FREE(svga);
@@ -196,14 +204,10 @@ void svga_context_flush( struct svga_context *svga,
{
struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
struct pipe_fence_handle *fence = NULL;
+ enum pipe_error ret;
svga->curr.nr_fbs = 0;
- /* Unmap upload manager buffers:
- */
- u_upload_flush(svga->upload_vb);
- u_upload_flush(svga->upload_ib);
-
/* Ensure that texture dma uploads are processed
* before submitting commands.
*/
@@ -220,6 +224,21 @@ void svga_context_flush( struct svga_context *svga,
*/
svga->dirty |= SVGA_NEW_COMMAND_BUFFER;
+ /*
+ * We must reemit the surface bindings here, because svga_update_state
+ * will always flush the primitives before processing the
+ * SVGA_NEW_COMMAND_BUFFER state change.
+ *
+ * TODO: Refactor this.
+ */
+ ret = svga_reemit_framebuffer_bindings(svga);
+ assert(ret == PIPE_OK);
+
+ ret = svga_reemit_tss_bindings(svga);
+ assert(ret == PIPE_OK);
+
+ svga->dirty &= ~SVGA_NEW_COMMAND_BUFFER;
+
if (SVGA_DEBUG & DEBUG_SYNC) {
if (fence)
svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0);
@@ -245,6 +264,30 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
assert(ret == 0);
}
+
+/* Emit all operations pending on host surfaces.
+ */
+void svga_surfaces_flush(struct svga_context *svga)
+{
+ unsigned i;
+
+ /* Emit buffered drawing commands.
+ */
+ svga_hwtnl_flush_retry( svga );
+
+ /* Emit back-copy from render target view to texture.
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (svga->curr.framebuffer.cbufs[i])
+ svga_propagate_surface(svga, svga->curr.framebuffer.cbufs[i]);
+ }
+
+ if (svga->curr.framebuffer.zsbuf)
+ svga_propagate_surface(svga, svga->curr.framebuffer.zsbuf);
+
+}
+
+
struct svga_winsys_context *
svga_winsys_context( struct pipe_context *pipe )
{
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index d4970908b1e..7b36a3606e0 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -35,6 +35,8 @@
#include "tgsi/tgsi_scan.h"
+#include "svga_state.h"
+
#define SVGA_TEX_UNITS 8
#define SVGA_MAX_POINTSIZE 80.0
@@ -147,7 +149,14 @@ struct svga_rasterizer_state {
float pointsize;
unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */
- unsigned need_pipeline:16; /* which prims do we need help for? */
+
+ /** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */
+ unsigned need_pipeline:16;
+
+ /** For debugging: */
+ const char* need_pipeline_tris_str;
+ const char* need_pipeline_lines_str;
+ const char* need_pipeline_points_str;
};
struct svga_sampler_state {
@@ -237,7 +246,7 @@ struct svga_prescale {
};
-/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT )
+/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR )
*/
struct svga_hw_clear_state
{
@@ -317,6 +326,9 @@ struct svga_context
unsigned shader_id;
unsigned disable_shader;
+
+ boolean no_line_width;
+ boolean force_hw_line_stipple;
} debug;
struct {
@@ -332,7 +344,7 @@ struct svga_context
struct util_bitmask *vs_bm;
struct {
- unsigned dirty[4];
+ unsigned dirty[SVGA_STATE_MAX];
unsigned texture_timestamp;
@@ -355,6 +367,9 @@ struct svga_context
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
+
+ /** Was the previous draw done with the SW path? */
+ boolean prev_draw_swtnl;
};
/* A flag for each state_tracker state object:
@@ -438,6 +453,8 @@ void svga_context_flush( struct svga_context *svga,
void svga_hwtnl_flush_retry( struct svga_context *svga );
+void svga_surfaces_flush(struct svga_context *svga);
+
struct pipe_context *
svga_context_create(struct pipe_screen *screen,
void *priv);
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 97cbac447d6..2c873a0f7ac 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -28,6 +28,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_upload_mgr.h"
#include "svga_context.h"
#include "svga_draw.h"
@@ -143,6 +144,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
SVGA3dPrimitiveRange *prim;
unsigned i;
+ /* Unmap upload manager vertex buffers */
+ u_upload_flush(svga->upload_vb);
+
for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
if (handle == NULL)
@@ -151,6 +155,9 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
vb_handle[i] = handle;
}
+ /* Unmap upload manager index buffers */
+ u_upload_flush(svga->upload_ib);
+
for (i = 0; i < hwtnl->cmd.prim_count; i++) {
if (hwtnl->cmd.prim_ib[i]) {
handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index be0e7abe21b..a6518042eb9 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -53,6 +53,7 @@ static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
dst = pipe_buffer_create( pipe->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
size );
if (dst == NULL)
goto fail;
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index 83527c6ef49..7d420c6b295 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -56,6 +56,7 @@ translate_indices( struct svga_hwtnl *hwtnl,
dst = pipe_buffer_create( pipe->screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STATIC,
size );
if (dst == NULL)
goto fail;
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index 426698806c8..c87afb6946c 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -50,7 +50,9 @@ static void svga_surface_copy(struct pipe_context *pipe,
struct pipe_surface *srcsurf, *dstsurf;*/
unsigned dst_face, dst_z, src_face, src_z;
- svga_hwtnl_flush_retry( svga );
+ /* Emit buffered drawing commands, and any back copies.
+ */
+ svga_surfaces_flush( svga );
#if 0
srcsurf = screen->get_tex_surface(screen, src_tex,
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 001ec3616c4..d98b9b0e000 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -157,6 +157,14 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!u_trim_pipe_prim( info->mode, &count ))
return;
+ if (svga->state.sw.need_swtnl != svga->prev_draw_swtnl) {
+ /* We're switching between SW and HW drawing. Do a flush to avoid
+ * mixing HW and SW rendering with the same vertex buffer.
+ */
+ pipe->flush(pipe, ~0, NULL);
+ svga->prev_draw_swtnl = svga->state.sw.need_swtnl;
+ }
+
/*
* Mark currently bound target surfaces as dirty
* doesn't really matter if it is done before drawing.
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
index ab243aa6ec5..9357d827f28 100644
--- a/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -24,6 +24,7 @@
**********************************************************/
#include "pipe/p_defines.h"
+#include "util/u_string.h"
#include "svga_screen.h"
#include "svga_surface.h"
#include "svga_context.h"
@@ -35,20 +36,10 @@ static void svga_flush( struct pipe_context *pipe,
struct pipe_fence_handle **fence )
{
struct svga_context *svga = svga_context(pipe);
- int i;
- /* Emit buffered drawing commands.
+ /* Emit buffered drawing commands, and any back copies.
*/
- svga_hwtnl_flush_retry( svga );
-
- /* Emit back-copy from render target view to texture.
- */
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- if (svga->curr.framebuffer.cbufs[i])
- svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]);
- }
- if (svga->curr.framebuffer.zsbuf)
- svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf);
+ svga_surfaces_flush( svga );
/* Flush command queue.
*/
@@ -56,6 +47,28 @@ static void svga_flush( struct pipe_context *pipe,
SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n",
__FUNCTION__, flags, fence ? *fence : 0x0);
+
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+ if (0) {
+ if (flags & PIPE_FLUSH_FRAME) {
+ struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+ static unsigned frame_no = 1;
+ char filename[256];
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ util_snprintf(filename, sizeof(filename), "cbuf%u_%04u", i, frame_no);
+ debug_dump_surface_bmp(&svga->pipe, filename, fb->cbufs[i]);
+ }
+
+ if (0 && fb->zsbuf) {
+ util_snprintf(filename, sizeof(filename), "zsbuf_%04u", frame_no);
+ debug_dump_surface_bmp(&svga->pipe, filename, fb->zsbuf);
+ }
+
+ ++frame_no;
+ }
+ }
}
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index 8c24fb302f7..440919c6262 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -94,7 +94,7 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
- svga_propagate_surface(pipe, dst->cbufs[i]);
+ svga_propagate_surface(svga, dst->cbufs[i]);
}
/* XXX: Actually the virtual hardware may support rendertargets with
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index e97b4e57415..4a1a37f1765 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -64,7 +64,9 @@ static void *
svga_create_rasterizer_state(struct pipe_context *pipe,
const struct pipe_rasterizer_state *templ)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
+
/* need this for draw module. */
rast->templ = *templ;
@@ -75,7 +77,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
/* point_quad_rasterization - ? */
/* point_size_per_vertex - ? */
/* sprite_coord_mode - ??? */
- /* bypass_vs_viewport_and_clip - handled by viewport setup */
/* flatshade_first - handled by index translation */
/* gl_rasterization_rules - XXX - viewport code */
/* line_width - draw module */
@@ -93,17 +94,22 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
/* Use swtnl + decomposition implement these:
*/
- if (templ->poly_stipple_enable)
+ if (templ->poly_stipple_enable) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "poly stipple";
+ }
- if (templ->line_width != 1.0 &&
- templ->line_width != 0.0)
+ if (templ->line_width >= 1.5f &&
+ !svga->debug.no_line_width) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "line width";
+ }
if (templ->line_stipple_enable) {
- /* LinePattern not implemented on all backends.
+ /* XXX: LinePattern not implemented on all backends, and there is no
+ * mechanism to query it.
*/
- if (0) {
+ if (!svga->debug.force_hw_line_stipple) {
SVGA3dLinePattern lp;
lp.repeat = templ->line_stipple_factor + 1;
lp.pattern = templ->line_stipple_pattern;
@@ -111,11 +117,19 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
}
else {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "line stipple";
}
}
- if (templ->point_smooth)
+ if (templ->point_smooth) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
+ rast->need_pipeline_points_str = "smooth points";
+ }
+
+ if (templ->line_smooth) {
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ rast->need_pipeline_lines_str = "smooth lines";
+ }
{
int fill_front = templ->fill_front;
@@ -148,6 +162,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
* front/back fill modes:
*/
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "different front/back fillmodes";
}
else {
offset = offset_front;
@@ -172,6 +187,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation";
}
/* If we are decomposing to lines, and lines need the pipeline,
@@ -182,6 +198,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "decomposing lines";
}
/* Similarly for points:
@@ -191,6 +208,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
{
fill = PIPE_POLYGON_MODE_FILL;
rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ rast->need_pipeline_tris_str = "decomposing points";
}
if (offset) {
@@ -201,9 +219,6 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
rast->hw_unfilled = fill;
}
-
-
-
if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
/* Turn off stuff which will get done in the draw module:
*/
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index f44a0e1325a..446fcc44078 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -144,8 +144,9 @@ svga_create_sampler_state(struct pipe_context *pipe,
return cso;
}
-static void svga_bind_sampler_states(struct pipe_context *pipe,
- unsigned num, void **sampler)
+static void
+svga_bind_fragment_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
{
struct svga_context *svga = svga_context(pipe);
unsigned i;
@@ -203,9 +204,10 @@ svga_sampler_view_destroy(struct pipe_context *pipe,
FREE(view);
}
-static void svga_set_sampler_views(struct pipe_context *pipe,
- unsigned num,
- struct pipe_sampler_view **views)
+static void
+svga_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
{
struct svga_context *svga = svga_context(pipe);
unsigned flag_1d = 0;
@@ -256,9 +258,9 @@ static void svga_set_sampler_views(struct pipe_context *pipe,
void svga_init_sampler_functions( struct svga_context *svga )
{
svga->pipe.create_sampler_state = svga_create_sampler_state;
- svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states;
+ svga->pipe.bind_fragment_sampler_states = svga_bind_fragment_sampler_states;
svga->pipe.delete_sampler_state = svga_delete_sampler_state;
- svga->pipe.set_fragment_sampler_views = svga_set_sampler_views;
+ svga->pipe.set_fragment_sampler_views = svga_set_fragment_sampler_views;
svga->pipe.create_sampler_view = svga_create_sampler_view;
svga->pipe.sampler_view_destroy = svga_sampler_view_destroy;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 86c79459f3e..58469910732 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -27,6 +27,7 @@
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
#include "svga_screen.h"
diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c
index ef2a0c40f03..bed15ec02e5 100644
--- a/src/gallium/drivers/svga/svga_resource.c
+++ b/src/gallium/drivers/svga/svga_resource.c
@@ -40,6 +40,7 @@ svga_init_resource_functions(struct svga_context *svga)
svga->pipe.transfer_unmap = u_transfer_unmap_vtbl;
svga->pipe.transfer_destroy = u_transfer_destroy_vtbl;
svga->pipe.transfer_inline_write = u_transfer_inline_write_vtbl;
+ svga->pipe.redefine_user_buffer = svga_redefine_user_buffer;
}
void
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index f12e2b68627..e1f07d655b9 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -105,9 +105,12 @@ svga_buffer_map_range( struct pipe_screen *screen,
* We can't create a hardware buffer big enough, so create a malloc
* buffer instead.
*/
- debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n",
- __FUNCTION__,
- (sbuf->b.b.width0 + 1023)/1024);
+ if (0) {
+ debug_printf("%s: failed to allocate %u KB of DMA, "
+ "splitting DMA transfers\n",
+ __FUNCTION__,
+ (sbuf->b.b.width0 + 1023)/1024);
+ }
sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16);
}
@@ -308,6 +311,9 @@ svga_buffer_create(struct pipe_screen *screen,
goto error2;
}
+ debug_reference(&sbuf->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
+
return &sbuf->b.b;
error2:
@@ -341,6 +347,9 @@ svga_user_buffer_create(struct pipe_screen *screen,
sbuf->swbuf = ptr;
sbuf->user = TRUE;
+
+ debug_reference(&sbuf->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
return &sbuf->b.b;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index d3ec11bfd52..c559f70ec12 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -243,4 +243,10 @@ svga_winsys_buffer_create(struct svga_context *svga,
unsigned usage,
unsigned size);
+void
+svga_redefine_user_buffer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size);
+
#endif /* SVGA_BUFFER_H */
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 3de5216a949..76a3803224a 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -40,6 +40,9 @@
#include "svga_debug.h"
+#define MAX_DMA_SIZE (8 * 1024 * 1024)
+
+
/**
* Allocate a winsys_buffer (ie. DMA, aka GMR memory).
*
@@ -57,6 +60,13 @@ svga_winsys_buffer_create( struct svga_context *svga,
struct svga_winsys_screen *sws = svgascreen->sws;
struct svga_winsys_buffer *buf;
+ /* XXX this shouldn't be a hard-coded number; it should be queried
+ * somehow.
+ */
+ if (size > MAX_DMA_SIZE) {
+ return NULL;
+ }
+
/* Just try */
buf = sws->buffer_create(sws, alignment, usage, size);
if(!buf) {
@@ -248,6 +258,7 @@ svga_buffer_upload_flush(struct svga_context *svga,
{
SVGA3dCopyBox *boxes;
unsigned i;
+ struct pipe_resource *dummy;
assert(sbuf->handle);
assert(sbuf->hwbuf);
@@ -289,9 +300,9 @@ svga_buffer_upload_flush(struct svga_context *svga,
sbuf->dma.svga = NULL;
sbuf->dma.boxes = NULL;
- /* Decrement reference count */
- pipe_reference(&(sbuf->b.b.reference), NULL);
- sbuf = NULL;
+ /* Decrement reference count (and potentially destroy) */
+ dummy = &sbuf->b.b;
+ pipe_resource_reference(&dummy, NULL);
}
@@ -638,3 +649,54 @@ svga_context_flush_buffers(struct svga_context *svga)
next = curr->next;
}
}
+
+
+void
+svga_redefine_user_buffer(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned offset,
+ unsigned size)
+{
+ struct svga_screen *ss = svga_screen(pipe->screen);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_buffer *sbuf = svga_buffer(resource);
+
+ assert(sbuf->user);
+
+ /*
+ * Release any uploaded user buffer.
+ *
+ * TODO: As an optimization, we could try to update the uploaded buffer
+ * instead.
+ */
+
+ pipe_resource_reference(&sbuf->uploaded.buffer, NULL);
+
+ pipe_mutex_lock(ss->swc_mutex);
+
+ if (offset + size > resource->width0) {
+ /*
+ * User buffers shouldn't have DMA directly, unless
+ * SVGA_COMBINE_USERBUFFERS is not set.
+ */
+
+ if (sbuf->dma.pending) {
+ svga_buffer_upload_flush(svga, sbuf);
+ }
+
+ if (sbuf->handle) {
+ svga_buffer_destroy_host_surface(ss, sbuf);
+ }
+
+ if (sbuf->hwbuf) {
+ svga_buffer_destroy_hw_storage(ss, sbuf);
+ }
+
+ sbuf->key.size.width = sbuf->b.b.width0 = offset + size;
+ }
+
+ pipe_mutex_unlock(ss->swc_mutex);
+
+ svga->curr.any_user_vertex_buffers = TRUE;
+ svga->dirty |= SVGA_NEW_VBUFFER | SVGA_NEW_VELEMENT;
+}
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 7c9e600b9f4..994f30719ae 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -146,16 +146,6 @@ svga_translate_format_render(enum pipe_format format)
case PIPE_FORMAT_L8_UNORM:
return svga_translate_format(format);
-#if 1
- /* For on host conversion */
- case PIPE_FORMAT_DXT1_RGB:
- return SVGA3D_X8R8G8B8;
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return SVGA3D_A8R8G8B8;
-#endif
-
default:
return SVGA3D_FORMAT_INVALID;
}
@@ -204,7 +194,7 @@ svga_transfer_dma_band(struct svga_context *svga,
ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
if(ret != PIPE_OK) {
- svga->swc->flush(svga->swc, NULL);
+ svga_context_flush(svga, NULL);
ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1);
assert(ret == PIPE_OK);
}
@@ -225,6 +215,10 @@ svga_transfer_dma(struct svga_context *svga,
SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
}
+ /* Ensure any pending operations on host surfaces are queued on the command
+ * buffer first.
+ */
+ svga_surfaces_flush( svga );
if(!st->swbuf) {
/* Do the DMA transfer in a single go */
@@ -390,11 +384,15 @@ svga_texture_get_transfer(struct pipe_context *pipe,
if(st->hw_nblocksy < nblocksy) {
/* We couldn't allocate a hardware buffer big enough for the transfer,
* so allocate regular malloc memory instead */
- debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
- __FUNCTION__,
- (nblocksy*st->base.stride + 1023)/1024,
- (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
- (st->hw_nblocksy*st->base.stride + 1023)/1024);
+ if (0) {
+ debug_printf("%s: failed to allocate %u KB of DMA, "
+ "splitting into %u x %u KB DMA transfers\n",
+ __FUNCTION__,
+ (nblocksy*st->base.stride + 1023)/1024,
+ (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+ (st->hw_nblocksy*st->base.stride + 1023)/1024);
+ }
+
st->swbuf = MALLOC(nblocksy*st->base.stride);
if(!st->swbuf)
goto no_swbuf;
@@ -527,7 +525,8 @@ svga_texture_create(struct pipe_screen *screen,
tex->key.numFaces = 1;
}
- tex->key.cachable = 1;
+ /* XXX: Disabled for now */
+ tex->key.cachable = 0;
if (template->bind & PIPE_BIND_SAMPLER_VIEW)
tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
@@ -571,6 +570,9 @@ svga_texture_create(struct pipe_screen *screen,
if (tex->handle)
SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle);
+ debug_reference(&tex->b.b.reference,
+ (debug_reference_descriptor)debug_describe_resource, 0);
+
return &tex->b.b;
error2:
diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c
index 6911f13f778..4f1f4b597e8 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@@ -32,6 +32,7 @@
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
#include "svga_screen.h"
#include "svga_context.h"
@@ -41,14 +42,24 @@
#include "svga_surface.h"
+void
+svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv)
+{
+ char res[128];
+ debug_describe_resource(res, sv->texture);
+ util_sprintf(buf, "svga_sampler_view<%s,[%u,%u]>", res, sv->min_lod, sv->max_lod);
+}
+
struct svga_sampler_view *
svga_get_tex_sampler_view(struct pipe_context *pipe,
struct pipe_resource *pt,
unsigned min_lod, unsigned max_lod)
{
- struct svga_screen *ss = svga_screen(pt->screen);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_texture *tex = svga_texture(pt);
struct svga_sampler_view *sv = NULL;
+ SVGA3dSurfaceFlags flags = SVGA3D_SURFACE_HINT_TEXTURE;
SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
boolean view = TRUE;
@@ -68,10 +79,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
if (min_lod == 0 && max_lod >= pt->last_level)
view = FALSE;
- if (util_format_is_s3tc(pt->format) && view) {
- format = svga_translate_format_render(pt->format);
- }
-
if (ss->debug.no_sampler_view)
view = FALSE;
@@ -113,6 +120,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
pt->last_level);
sv->key.cachable = 0;
sv->handle = tex->handle;
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
return sv;
}
@@ -126,7 +135,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
pt->last_level);
sv->age = tex->age;
- sv->handle = svga_texture_view_surface(pipe, tex, format,
+ sv->handle = svga_texture_view_surface(svga, tex, flags, format,
min_lod,
max_lod - min_lod + 1,
-1, -1,
@@ -136,6 +145,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
assert(0);
sv->key.cachable = 0;
sv->handle = tex->handle;
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
return sv;
}
@@ -143,6 +154,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
svga_sampler_view_reference(&tex->cached_view, sv);
pipe_mutex_unlock(ss->tex_mutex);
+ debug_reference(&sv->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view, 0);
+
return sv;
}
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index e64665f2e58..2087c1be85e 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -83,12 +83,16 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
void
svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
+void
+svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv);
+
static INLINE void
svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
{
struct svga_sampler_view *old = *ptr;
- if (pipe_reference(&(*ptr)->reference, &v->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &v->reference,
+ (debug_reference_descriptor)svga_debug_describe_sampler_view))
svga_destroy_sampler_view_priv(old);
*ptr = v;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index d0f42c614c9..ef1d3098d51 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -225,13 +225,18 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
return svgascreen->use_ps30 ? 32 : 12;
return result.u;
case PIPE_SHADER_CAP_MAX_ADDRS:
- return svgascreen->use_ps30 ? 1 : 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ /*
+ * Although PS 3.0 has some addressing abilities it can only represent
+ * loops that can be statically determined and unrolled. Given we can
+ * only handle a subset of the cases that the state tracker already
+ * does it is better to defer loop unrolling to the state tracker.
+ */
+ return 0;
case PIPE_SHADER_CAP_MAX_PREDS:
return svgascreen->use_ps30 ? 1 : 0;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
- case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- return svgascreen->use_ps30 ? 1 : 0;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
@@ -360,13 +365,6 @@ svga_is_format_supported( struct pipe_screen *screen,
case PIPE_FORMAT_B5G5R5A1_UNORM:
return FALSE;
- /* Simulate ability to render into compressed textures */
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return TRUE;
-
default:
break;
}
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
index 22d5a6d552a..7f239e7a322 100644
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -92,4 +92,8 @@ void svga_update_state_retry( struct svga_context *svga,
enum pipe_error svga_emit_initial_state( struct svga_context *svga );
+enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
+
+enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
+
#endif
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index fcbb35e7972..cdadb20c178 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -93,6 +93,55 @@ static int emit_framebuffer( struct svga_context *svga,
}
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ unsigned i;
+ enum pipe_error ret;
+
+ for (i = 0; i < MIN2(PIPE_MAX_COLOR_BUFS, 8); ++i) {
+ if (hw->cbufs[i]) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, hw->cbufs[i]);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ }
+
+ if (hw->zsbuf) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, hw->zsbuf);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ if (hw->zsbuf &&
+ hw->zsbuf->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ else {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+ }
+
+ return PIPE_OK;
+}
+
+
struct svga_tracked_state svga_hw_framebuffer =
{
"hw framebuffer state",
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index 8ba5ac8cdb4..68c02578789 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -35,6 +35,11 @@
/***********************************************************************
*/
+
+/**
+ * Given a gallium vertex element format, return the corresponding SVGA3D
+ * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ */
static INLINE SVGA3dDeclType
svga_translate_vertex_format(enum pipe_format format)
{
@@ -80,6 +85,7 @@ static int update_need_swvfetch( struct svga_context *svga,
for (i = 0; i < svga->curr.velems->count; i++) {
svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format);
if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
+ /* Unsupported format - use software fetch */
need_swvfetch = TRUE;
break;
}
@@ -118,6 +124,11 @@ static int update_need_pipeline( struct svga_context *svga,
__FUNCTION__,
svga->curr.rast->need_pipeline,
(1 << svga->curr.reduced_prim) );
+ SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline tris (%s), lines (%s), points (%s)\n",
+ __FUNCTION__,
+ svga->curr.rast->need_pipeline_tris_str,
+ svga->curr.rast->need_pipeline_lines_str,
+ svga->curr.rast->need_pipeline_points_str);
need_pipeline = TRUE;
}
@@ -140,6 +151,10 @@ static int update_need_pipeline( struct svga_context *svga,
svga->dirty |= SVGA_NEW_NEED_PIPELINE;
}
+ /* DEBUG */
+ if (0 && svga->state.sw.need_pipeline)
+ debug_printf("sw.need_pipeline = %d\n", svga->state.sw.need_pipeline);
+
return 0;
}
@@ -164,15 +179,15 @@ static int update_need_swtnl( struct svga_context *svga,
boolean need_swtnl;
if (svga->debug.no_swtnl) {
- svga->state.sw.need_swvfetch = 0;
- svga->state.sw.need_pipeline = 0;
+ svga->state.sw.need_swvfetch = FALSE;
+ svga->state.sw.need_pipeline = FALSE;
}
need_swtnl = (svga->state.sw.need_swvfetch ||
svga->state.sw.need_pipeline);
if (svga->debug.force_swtnl) {
- need_swtnl = 1;
+ need_swtnl = TRUE;
}
/*
@@ -181,7 +196,7 @@ static int update_need_swtnl( struct svga_context *svga,
* the wrong buffers and vertex formats. Try trivial/line-wide.
*/
if (svga->state.sw.in_swtnl_draw)
- need_swtnl = 1;
+ need_swtnl = TRUE;
if (need_swtnl != svga->state.sw.need_swtnl) {
SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index f8b269a101e..c502506b93b 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -52,6 +52,16 @@ void svga_cleanup_tss_binding(struct svga_context *svga)
}
+struct bind_queue {
+ struct {
+ unsigned unit;
+ struct svga_hw_view_state *view;
+ } bind[PIPE_MAX_SAMPLERS];
+
+ unsigned bind_count;
+};
+
+
static int
update_tss_binding(struct svga_context *svga,
unsigned dirty )
@@ -63,15 +73,7 @@ update_tss_binding(struct svga_context *svga,
unsigned min_lod;
unsigned max_lod;
-
- struct {
- struct {
- unsigned unit;
- struct svga_hw_view_state *view;
- } bind[PIPE_MAX_SAMPLERS];
-
- unsigned bind_count;
- } queue;
+ struct bind_queue queue;
queue.bind_count = 0;
@@ -164,6 +166,64 @@ fail:
}
+/*
+ * Rebind textures.
+ *
+ * Similar to update_tss_binding, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty textures are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_tss_bindings(struct svga_context *svga)
+{
+ unsigned i;
+ enum pipe_error ret;
+ struct bind_queue queue;
+
+ queue.bind_count = 0;
+
+ for (i = 0; i < svga->state.hw_draw.num_views; i++) {
+ struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+ if (view->v) {
+ queue.bind[queue.bind_count].unit = i;
+ queue.bind[queue.bind_count].view = view;
+ queue.bind_count++;
+ }
+ }
+
+ if (queue.bind_count) {
+ SVGA3dTextureState *ts;
+
+ ret = SVGA3D_BeginSetTextureState(svga->swc,
+ &ts,
+ queue.bind_count);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ for (i = 0; i < queue.bind_count; i++) {
+ struct svga_winsys_surface *handle;
+
+ ts[i].stage = queue.bind[i].unit;
+ ts[i].name = SVGA3D_TS_BIND_TEXTURE;
+
+ assert(queue.bind[i].view->v);
+ handle = queue.bind[i].view->v->handle;
+ svga->swc->surface_relocation(svga->swc,
+ &ts[i].value,
+ handle,
+ SVGA_RELOC_READ);
+ }
+
+ SVGA_FIFOCommitAll(svga->swc);
+ }
+
+ return PIPE_OK;
+}
+
+
struct svga_tracked_state svga_hw_tss_binding = {
"texture binding emit",
SVGA_NEW_TEXTURE_BINDING |
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 6682a1efe66..ae9a20ebb81 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -229,7 +229,7 @@ static int update_zero_stride( struct svga_context *svga,
translate->set_buffer(translate, vel->vertex_buffer_index,
mapped_buffer,
- vbuffer->stride, vbuffer->max_index);
+ vbuffer->stride, ~0);
translate->run(translate, 0, 1, 0,
svga->curr.zero_stride_constants);
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 3e4bed76c05..3e8fb5f0271 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -100,8 +100,9 @@ svga_texture_copy_handle(struct svga_context *svga,
struct svga_winsys_surface *
-svga_texture_view_surface(struct pipe_context *pipe,
+svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
@@ -109,7 +110,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
int zslice_pick,
struct svga_host_surface_cache_key *key) /* OUT */
{
- struct svga_screen *ss = svga_screen(pipe->screen);
+ struct svga_screen *ss = svga_screen(svga->pipe.screen);
struct svga_winsys_surface *handle;
uint32_t i, j;
unsigned z_offset = 0;
@@ -118,7 +119,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
"svga: Create surface view: face %d zslice %d mips %d..%d\n",
face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
- key->flags = 0;
+ key->flags = flags;
key->format = format;
key->numMipLevels = num_mip;
key->size.width = u_minify(tex->b.b.width0, start_mip);
@@ -161,7 +162,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
u_minify(tex->b.b.depth0, i + start_mip) :
1);
- svga_texture_copy_handle(svga_context(pipe),
+ svga_texture_copy_handle(svga,
tex->handle,
0, 0, z_offset,
i + start_mip,
@@ -183,6 +184,7 @@ svga_create_surface(struct pipe_context *pipe,
struct pipe_resource *pt,
const struct pipe_surface *surf_tmpl)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_texture *tex = svga_texture(pt);
struct pipe_screen *screen = pipe->screen;
struct svga_surface *s;
@@ -191,6 +193,7 @@ svga_create_surface(struct pipe_context *pipe,
boolean render = (surf_tmpl->usage & (PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DEPTH_STENCIL)) ? TRUE : FALSE;
boolean view = FALSE;
+ SVGA3dSurfaceFlags flags;
SVGA3dSurfaceFormat format;
assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
@@ -219,10 +222,18 @@ svga_create_surface(struct pipe_context *pipe,
s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- if (!render)
+ if (!render) {
+ flags = SVGA3D_SURFACE_HINT_TEXTURE;
format = svga_translate_format(surf_tmpl->format);
- else
+ } else {
+ if (surf_tmpl->usage & PIPE_BIND_RENDER_TARGET) {
+ flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+ }
+ if (surf_tmpl->usage & PIPE_BIND_DEPTH_STENCIL) {
+ flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+ }
format = svga_translate_format_render(surf_tmpl->format);
+ }
assert(format != SVGA3D_FORMAT_INVALID);
@@ -249,7 +260,8 @@ svga_create_surface(struct pipe_context *pipe,
SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
pt, surf_tmpl->u.tex.level, face, zslice, s);
- s->handle = svga_texture_view_surface(NULL, tex, format, surf_tmpl->u.tex.level,
+ s->handle = svga_texture_view_surface(svga, tex, flags, format,
+ surf_tmpl->u.tex.level,
1, face, zslice, &s->key);
s->real_face = 0;
s->real_level = 0;
@@ -329,7 +341,7 @@ void svga_mark_surfaces_dirty(struct svga_context *svga)
* pipe is optional context to inline the blit command in.
*/
void
-svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
+svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
{
struct svga_surface *s = svga_surface(surf);
struct svga_texture *tex = svga_texture(surf->texture);
@@ -354,7 +366,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
if (s->handle != tex->handle) {
SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->u.tex.level, surf);
- svga_texture_copy_handle(svga_context(pipe),
+ svga_texture_copy_handle(svga,
s->handle, 0, 0, 0, s->real_level, s->real_face,
tex->handle, 0, 0, zslice, surf->u.tex.level, face,
u_minify(tex->b.b.width0, surf->u.tex.level),
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index afb8326e1f3..bffc8c22c60 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -56,14 +56,15 @@ struct svga_surface
extern void
-svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf);
+svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf);
extern boolean
svga_surface_needs_propagation(struct pipe_surface *surf);
struct svga_winsys_surface *
-svga_texture_view_surface(struct pipe_context *pipe,
+svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
index d5db6bf641a..ac9d637f8cb 100644
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -87,11 +87,14 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
svga_render->vbuf = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->vbuf_size);
if(!svga_render->vbuf) {
svga_context_flush(svga, NULL);
+ assert(!svga_render->vbuf);
svga_render->vbuf = pipe_buffer_create(screen,
PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->vbuf_size);
assert(svga_render->vbuf);
}
@@ -261,6 +264,7 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
svga_render->ibuf = pipe_buffer_create(screen,
PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STREAM,
svga_render->ibuf_size);
svga_render->ibuf_offset = 0;
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 05d86e1fb16..ad29c1b6425 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -124,6 +124,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
/* Now safe to remove the need_swtnl flag in any update_state call */
svga->state.sw.in_swtnl_draw = FALSE;
+ svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH;
return ret;
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index a7592382936..efda2f605b9 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -61,7 +61,7 @@ static void set_draw_viewport( struct svga_context *svga )
* going to be drawn with triangles, but we're not catching all
* cases where that will happen.
*/
- if (svga->curr.rast->templ.line_width > 1.0)
+ if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
{
adjx = SVGA_LINE_ADJ_X + 0.175;
adjy = SVGA_LINE_ADJ_Y - 0.175;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index f2591c5721a..99600cf5c00 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -57,7 +57,6 @@ translate_opcode(
case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
- case TGSI_OPCODE_SSG: return SVGA3DOP_SGN;
default:
debug_printf("Unkown opcode %u\n", opcode);
assert( 0 );
@@ -285,6 +284,41 @@ static void reset_temp_regs( struct svga_shader_emitter *emit )
}
+/* Replace the src with the temporary specified in the dst, but copying
+ * only the necessary channels, and preserving the original swizzle (which is
+ * important given that several opcodes have constraints in the allowed
+ * swizzles).
+ */
+static boolean emit_repl( struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dst,
+ struct src_register *src0)
+{
+ unsigned src0_swizzle;
+ unsigned chan;
+
+ assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
+
+ src0_swizzle = src0->base.swizzle;
+
+ dst.mask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
+ dst.mask |= 1 << swizzle;
+ }
+ assert(dst.mask);
+
+ src0->base.swizzle = SVGA3DSWIZZLE_NONE;
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
+ return FALSE;
+
+ *src0 = src( dst );
+ src0->base.swizzle = src0_swizzle;
+
+ return TRUE;
+}
+
+
static boolean submit_op0( struct svga_shader_emitter *emit,
SVGA3dShaderInstToken inst,
SVGA3dShaderDestToken dest )
@@ -333,14 +367,11 @@ static boolean submit_op2( struct svga_shader_emitter *emit,
src0.base.num != src1.base.num)
need_temp = TRUE;
- if (need_temp)
- {
+ if (need_temp) {
temp = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
+ if (!emit_repl( emit, temp, &src0 ))
return FALSE;
-
- src0 = src( temp );
}
if (!emit_op2( emit, inst, dest, src0, src1 ))
@@ -396,24 +427,18 @@ static boolean submit_op3( struct svga_shader_emitter *emit,
(type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
need_temp1 = TRUE;
- if (need_temp0)
- {
+ if (need_temp0) {
temp0 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ if (!emit_repl( emit, temp0, &src0 ))
return FALSE;
-
- src0 = src( temp0 );
}
- if (need_temp1)
- {
+ if (need_temp1) {
temp1 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
+ if (!emit_repl( emit, temp1, &src1 ))
return FALSE;
-
- src1 = src( temp1 );
}
if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
@@ -478,24 +503,18 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
(type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
need_temp3 = TRUE;
- if (need_temp0)
- {
+ if (need_temp0) {
temp0 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ if (!emit_repl( emit, temp0, &src0 ))
return FALSE;
-
- src0 = src( temp0 );
}
- if (need_temp3)
- {
+ if (need_temp3) {
temp3 = get_temp( emit );
- if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
+ if (!emit_repl( emit, temp3, &src3 ))
return FALSE;
-
- src3 = src( temp3 );
}
if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
@@ -509,6 +528,55 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
}
+static boolean alias_src_dst( struct src_register src,
+ SVGA3dShaderDestToken dst )
+{
+ if (src.base.num != dst.num)
+ return FALSE;
+
+ if (SVGA3dShaderGetRegType(dst.value) !=
+ SVGA3dShaderGetRegType(src.base.value))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+static boolean submit_lrp(struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dst,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2)
+{
+ SVGA3dShaderDestToken tmp;
+ boolean need_dst_tmp = FALSE;
+
+ /* The dst reg must be a temporary, and not be the same as src0 or src2 */
+ if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
+ alias_src_dst(src0, dst) ||
+ alias_src_dst(src2, dst))
+ need_dst_tmp = TRUE;
+
+ if (need_dst_tmp) {
+ tmp = get_temp( emit );
+ tmp.mask = dst.mask;
+ }
+ else {
+ tmp = dst;
+ }
+
+ if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
+ return FALSE;
+
+ if (need_dst_tmp) {
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
static boolean emit_def_const( struct svga_shader_emitter *emit,
SVGA3dShaderConstType type,
unsigned idx,
@@ -747,7 +815,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit,
static boolean emit_if(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
- const struct src_register src = translate_src_register(
+ struct src_register src0 = translate_src_register(
emit, &insn->Src[0] );
struct src_register zero = get_zero_immediate( emit );
SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
@@ -755,10 +823,23 @@ static boolean emit_if(struct svga_shader_emitter *emit,
if_token.control = SVGA3DOPCOMPC_NE;
zero = scalar(zero, TGSI_SWIZZLE_X);
+ if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
+ /*
+ * Max different constant registers readable per IFC instruction is 1.
+ */
+
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
+ return FALSE;
+
+ src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
+ }
+
emit->dynamic_branching_level++;
return (emit_instruction( emit, if_token ) &&
- emit_src( emit, src ) &&
+ emit_src( emit, src0 ) &&
emit_src( emit, zero ) );
}
@@ -832,7 +913,7 @@ static boolean emit_cmp(struct svga_shader_emitter *emit,
*/
if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
return FALSE;
- return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2);
+ return submit_lrp(emit, dst, src(temp), src1, src2);
}
/* CMP DST, SRC0, SRC2, SRC1 */
@@ -1066,6 +1147,41 @@ static boolean emit_cos(struct svga_shader_emitter *emit,
return TRUE;
}
+static boolean emit_ssg(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp0 = get_temp( emit );
+ SVGA3dShaderDestToken temp1 = get_temp( emit );
+ struct src_register zero, one;
+
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ /* SGN DST, SRC0, TMP0, TMP1 */
+ return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
+ src( temp0 ), src( temp1 ) );
+ }
+
+ zero = get_zero_immediate( emit );
+ one = scalar( zero, TGSI_SWIZZLE_W );
+ zero = scalar( zero, TGSI_SWIZZLE_X );
+
+ /* CMP TMP0, SRC0, one, zero */
+ if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
+ writemask( temp0, dst.mask ), src0, one, zero ))
+ return FALSE;
+
+ /* CMP TMP1, negate(SRC0), negate(one), zero */
+ if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
+ writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
+ zero ))
+ return FALSE;
+
+ /* ADD DST, TMP0, TMP1 */
+ return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
+ src( temp1 ) );
+}
/*
* ADD DST SRC0, negate(SRC0)
@@ -1588,6 +1704,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit,
}
else {
unsigned opcode;
+ const struct tgsi_full_src_register *reg = &insn->Src[0];
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dst;
+ struct src_register src0;
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_DDX:
@@ -1600,7 +1720,21 @@ static boolean emit_deriv(struct svga_shader_emitter *emit,
return FALSE;
}
- return emit_simple_instruction( emit, opcode, insn );
+ inst = inst_token( opcode );
+ dst = translate_dst_register( emit, insn, 0 );
+ src0 = translate_src_register( emit, reg );
+
+ /* We cannot use negate or abs on source to dsx/dsy instruction.
+ */
+ if (reg->Register.Absolute ||
+ reg->Register.Negate) {
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ if (!emit_repl( emit, temp, &src0 ))
+ return FALSE;
+ }
+
+ return submit_op1( emit, inst, dst, src0 );
}
}
@@ -1624,19 +1758,6 @@ static boolean emit_arl(struct svga_shader_emitter *emit,
}
}
-static boolean alias_src_dst( struct src_register src,
- SVGA3dShaderDestToken dst )
-{
- if (src.base.num != dst.num)
- return FALSE;
-
- if (SVGA3dShaderGetRegType(dst.value) !=
- SVGA3dShaderGetRegType(src.base.value))
- return FALSE;
-
- return TRUE;
-}
-
static boolean emit_pow(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
@@ -1729,37 +1850,14 @@ static boolean emit_lrp(struct svga_shader_emitter *emit,
const struct tgsi_full_instruction *insn)
{
SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- SVGA3dShaderDestToken tmp;
const struct src_register src0 = translate_src_register(
emit, &insn->Src[0] );
const struct src_register src1 = translate_src_register(
emit, &insn->Src[1] );
const struct src_register src2 = translate_src_register(
emit, &insn->Src[2] );
- boolean need_dst_tmp = FALSE;
-
- /* The dst reg must not be the same as src0 or src2 */
- if (alias_src_dst(src0, dst) ||
- alias_src_dst(src2, dst))
- need_dst_tmp = TRUE;
- if (need_dst_tmp) {
- tmp = get_temp( emit );
- tmp.mask = dst.mask;
- }
- else {
- tmp = dst;
- }
-
- if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
- return FALSE;
-
- if (need_dst_tmp) {
- if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
- return FALSE;
- }
-
- return TRUE;
+ return submit_lrp(emit, dst, src0, src1, src2);
}
@@ -2366,6 +2464,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
case TGSI_OPCODE_LRP:
return emit_lrp( emit, insn );
+ case TGSI_OPCODE_SSG:
+ return emit_ssg( emit, insn );
+
default: {
unsigned opcode = translate_opcode(insn->Instruction.Opcode);
@@ -2715,6 +2816,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
return TRUE;
if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
return TRUE;
}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
index 95612a80063..ad1549d9f81 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -136,7 +136,7 @@ static struct sh_opcode_info opcode_info[] =
{ "dsy", 1, 1, 0, 0, SVGA3DOP_INVALID, },
{ "texldd", 1, 4, 0, 0, SVGA3DOP_INVALID, },
{ "setp", 1, 2, 0, 0, SVGA3DOP_SETP, },
- { "texldl", 1, 2, 0, 0, SVGA3DOP_INVALID, },
+ { "texldl", 1, 2, 0, 0, SVGA3DOP_TEXLDL, },
{ "breakp", 0, 1, 0, 0, SVGA3DOP_INVALID, },
};
@@ -156,6 +156,8 @@ const struct sh_opcode_info *svga_opcode_info( uint op )
if (info->svga_opcode == SVGA3DOP_INVALID) {
/* No valid information. Please provide number of dst/src registers.
*/
+ _debug_printf("Missing information for opcode %u, '%s'\n", op,
+ opcode_info[op].mnemonic);
assert( 0 );
return NULL;
}
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index eaabae8ce42..d24cc623c2e 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -1419,6 +1419,28 @@ trace_context_transfer_inline_write(struct pipe_context *_context,
}
+static void trace_redefine_user_buffer(struct pipe_context *_context,
+ struct pipe_resource *_resource,
+ unsigned offset, unsigned size)
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct trace_resource *tr_tex = trace_resource(_resource);
+ struct pipe_context *context = tr_context->pipe;
+ struct pipe_resource *resource = tr_tex->resource;
+
+ assert(resource->screen == context->screen);
+
+ trace_dump_call_begin("pipe_context", "redefine_user_buffer");
+
+ trace_dump_arg(ptr, context);
+ trace_dump_arg(ptr, resource);
+ trace_dump_arg(uint, offset);
+ trace_dump_arg(uint, size);
+
+ trace_dump_call_end();
+
+ context->redefine_user_buffer(context, resource, offset, size);
+}
static const struct debug_named_value rbug_blocker_flags[] = {
@@ -1506,6 +1528,7 @@ trace_context_create(struct trace_screen *tr_scr,
tr_ctx->base.transfer_unmap = trace_context_transfer_unmap;
tr_ctx->base.transfer_flush_region = trace_context_transfer_flush_region;
tr_ctx->base.transfer_inline_write = trace_context_transfer_inline_write;
+ tr_ctx->base.redefine_user_buffer = trace_redefine_user_buffer;
tr_ctx->pipe = pipe;
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 155c869fbd9..18805655bd7 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -517,7 +517,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state)
trace_dump_struct_begin("pipe_vertex_buffer");
trace_dump_member(uint, state, stride);
- trace_dump_member(uint, state, max_index);
trace_dump_member(uint, state, buffer_offset);
trace_dump_member(resource_ptr, state, buffer);