diff options
Diffstat (limited to 'src/gallium/drivers')
25 files changed, 563 insertions, 273 deletions
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index d92b2ccb31e..b4a91dabb37 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -75,6 +75,14 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, batch->ptr += size; } +static INLINE boolean +i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers) +{ + return batch->iws->validate_buffers(batch, buffers, num_of_buffers); +} + static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 97c25665156..f885417f8ed 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -49,6 +49,11 @@ i915_fill_blit(struct i915_context *i915, I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, &dst_buffer, 1)); + } + switch (cpp) { case 1: case 2: @@ -76,6 +81,8 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } void @@ -94,6 +101,7 @@ i915_copy_blit(struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; + struct i915_winsys_buffer *buffers[2] = {src_buffer, dst_buffer}; I915_DBG(DBG_BLIT, @@ -102,6 +110,11 @@ i915_copy_blit(struct i915_context *i915, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + if(!i915_winsys_validate_buffers(i915->batch, buffers, 2)) { + FLUSH_BATCH(NULL); + assert(i915_winsys_validate_buffers(i915->batch, buffers, 2)); + } + switch (cpp) { case 1: case 2: @@ -142,4 +155,6 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 707b2e9f956..cbf919754e5 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -73,10 +73,13 @@ i915_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) draw_set_mapped_index_buffer(draw, mapped_indices); if (cbuf_dirty) { - draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, - i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + if (i915->constants[PIPE_SHADER_VERTEX]) + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, + i915_buffer(i915->constants[PIPE_SHADER_VERTEX])->data, + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); + else + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0); } /* @@ -165,6 +168,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + i915->flush_dirty = 0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 2cf53424f06..1da637d068e 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -150,6 +150,15 @@ struct i915_state /** Describes the current hardware vertex layout */ struct vertex_info vertex_info; + /* static state (dst/depth buffer state) */ + struct i915_winsys_buffer *cbuf_bo; + unsigned cbuf_flags; + struct i915_winsys_buffer *depth_bo; + unsigned depth_flags; + unsigned dst_buf_vars; + uint32_t draw_offset; + uint32_t draw_size; + unsigned id; /* track lost context events */ }; @@ -237,6 +246,10 @@ struct i915_context { unsigned hardware_dirty; unsigned immediate_dirty; unsigned dynamic_dirty; + unsigned flush_dirty; + + struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; + int num_validation_buffers; struct util_slab_mempool transfer_pool; }; @@ -277,6 +290,18 @@ struct i915_context { #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) #define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1)) + +/* hw flush handling */ +#define I915_FLUSH_CACHE 1 +#define I915_PIPELINE_FLUSH 2 + +static INLINE +void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) +{ + i915->hardware_dirty |= I915_HW_FLUSH; + i915->flush_dirty |= flush; +} /*********************************************************************** diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 911c051d1f2..22a2c7b2cb4 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -96,4 +96,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + /* kernel emits flushes in between batchbuffers */ + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 509d487b498..0323ad940f9 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -36,73 +36,105 @@ #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_memory.h" -static unsigned translate_format( enum pipe_format format ) +struct i915_tracked_hw_state { + const char *name; + void (*validate)(struct i915_context *); + void (*emit)(struct i915_context *); + unsigned dirty, batch_space; +}; + + +static void +emit_flush(struct i915_context *i915) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_B5G6R5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); +} + +static void +validate_immediate(struct i915_context *i915) +{ + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) + i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; +} + +static void +validate_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo) + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.cbuf_bo; + + if (i915->current.depth_bo) + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.depth_bo; } -static unsigned translate_depth_format( enum pipe_format zformat ) +static void +validate_map(struct i915_context *i915) { - switch (zformat) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + struct i915_texture *tex; + + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; + } } } +const static struct i915_tracked_hw_state hw_atoms[] = { + { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, + { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE }, + { "static", validate_static, NULL, I915_HW_STATIC }, + { "map", validate_map, NULL, I915_HW_MAP } +}; -/** - * Examine framebuffer state to determine width, height. - */ static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) +i915_validate_state(struct i915_context *i915, unsigned *batch_space) { - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; + int i; + + i915->num_validation_buffers = 0; + *batch_space = 0; + + for (i = 0; i < Elements(hw_atoms); i++) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) { + hw_atoms[i].validate(i915); + *batch_space += hw_atoms[i].batch_space; + } + + if (i915->num_validation_buffers == 0) return TRUE; - } - else { - *width = *height = 0; + + if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, + i915->num_validation_buffers)) return FALSE; - } + + return TRUE; } -static inline uint32_t -buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +static void +emit_state(struct i915_context *i915) { - uint32_t tiling_bits = 0; - - switch (tiling) { - case I915_TILE_Y: - tiling_bits |= BUF_3D_TILE_WALK_Y; - case I915_TILE_X: - tiling_bits |= BUF_3D_TILED_SURFACE; - case I915_TILE_NONE: - break; - } + int i; - return tiling_bits; + for (i = 0; i < Elements(hw_atoms); i++) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) + hw_atoms[i].emit(i915); } /* Push the state into the sarea and/or texture memory. @@ -110,6 +142,7 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) void i915_emit_hardware_state(struct i915_context *i915 ) { + unsigned batch_space; /* XXX: there must be an easier way */ const unsigned dwords = ( 14 + 7 + @@ -135,14 +168,21 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); - if(!BEGIN_BATCH(dwords, relocs)) { + if (!i915_validate_state(i915, &batch_space)) { + FLUSH_BATCH(NULL); + assert(i915_validate_state(i915, &batch_space)); + } + + if(!BEGIN_BATCH(batch_space + dwords, relocs)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space + dwords, relocs)); } save_ptr = (uintptr_t)i915->batch->ptr; save_relocs = i915->batch->relocs; + emit_state(i915); /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIANT) { @@ -223,7 +263,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) { int i; for (i = 0; i < I915_MAX_DYNAMIC; i++) { - if (i915->dynamic_dirty & (1 << i)); + if (i915->dynamic_dirty & (1 << i)) OUT_BATCH(i915->current.dynamic[i]); } } @@ -233,64 +273,27 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 8 dwords, 2 relocs */ if (i915->hardware_dirty & I915_HW_STATIC) { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; - - if (cbuf_surface) { - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - assert(tex); - + if (i915->current.cbuf_bo) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0); } /* What happens if no zbuf?? */ - if (depth_surface) { - struct i915_texture *tex = i915_texture(depth_surface->texture); - unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, - depth_surface->u.tex.first_layer); - assert(tex); - assert(offset == 0); - + if (i915->current.depth_bo) { OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - assert(tex); - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - buf_3d_tiling_bits(tex->tiling)); - - OUT_RELOC(tex->buffer, + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0); } { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); + OUT_BATCH(i915->current.dst_buf_vars); } } #endif @@ -362,7 +365,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) uint i; OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + OUT_BATCH((1 << nr) - 1); for (i = 0; i < nr; i++) { const uint *c; @@ -411,31 +414,13 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 6 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_STATIC) { - uint w, h; - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - unsigned x, y; - int layer; - uint32_t draw_offset; - boolean ret; - - ret = framebuffer_size(&i915->framebuffer, &w, &h); - assert(ret); - - layer = cbuf_surface->u.tex.first_layer; - - x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; - y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; - - draw_offset = x | (y << 16); - /* XXX flush only required when the draw_offset changes! */ OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(draw_offset); - OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); - OUT_BATCH(draw_offset); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); } #endif diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index dc9a4c1e2fd..97044499990 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -27,17 +27,151 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_resource.h" /*********************************************************************** * Update framebuffer state */ +static unsigned translate_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_B5G6R5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format(enum pipe_format zformat) +{ + switch (zformat) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + static void update_framebuffer(struct i915_context *i915) { - /* HW emit currently references framebuffer state directly: + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + unsigned cformat, zformat; + unsigned x, y, w, h; + int layer; + uint32_t draw_offset; + boolean ret; + + if (cbuf_surface) { + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + assert(tex); + + i915->current.cbuf_bo = tex->buffer; + i915->current.cbuf_flags = BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + cformat = cbuf_surface->format; + + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + } else { + i915->current.cbuf_bo = NULL; + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ + x = y = 0; + } + cformat = translate_format(cformat); + + /* What happens if no zbuf?? */ + if (depth_surface) { + struct i915_texture *tex = i915_texture(depth_surface->texture); + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); + assert(tex); + assert(offset == 0); + + i915->current.depth_bo = tex->buffer; + i915->current.depth_flags = BUF_3D_ID_DEPTH | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + buf_3d_tiling_bits(tex->tiling); + zformat = translate_depth_format(depth_surface->format); + } else { + i915->current.depth_bo = NULL; + zformat = 0; + } + + i915->current.dst_buf_vars = DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat; + + /* drawing rect calculations */ + draw_offset = x | (y << 16); + ret = framebuffer_size(&i915->framebuffer, &w, &h); + assert(ret); + if (i915->current.draw_offset != draw_offset) { + i915->current.draw_offset = draw_offset; + /* XXX: only emit flush on change and not always in emit */ + } + i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16); + i915->hardware_dirty |= I915_HW_STATIC; + + /* flush the cache in case we sample from the old renderbuffers */ + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } struct i915_tracked_state i915_hw_framebuffer = { diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index e915a886c9b..4ac2f5b9777 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -95,6 +95,18 @@ struct i915_winsys { (*batchbuffer_create)(struct i915_winsys *iws); /** + * Validate buffers for usage in this batchbuffer. + * Does space-checking and asorted other book-keeping. + * + * @batch + * @buffers array to buffers to validate + * @num_of_buffers size of the passed array + */ + boolean (*validate_buffers)(struct i915_winsys_batchbuffer *batch, + struct i915_winsys_buffer **buffers, + int num_of_buffers); + + /** * Emit a relocation to a buffer. * Target position in batchbuffer is the same as ptr. * diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c index 3a55e76bc35..a21a3c74484 100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@ -47,6 +47,9 @@ lp_fence_create(unsigned rank) static int fence_id; struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + if (!fence) + return NULL; + pipe_reference_init(&fence->reference, 1); pipe_mutex_init(fence->mutex); diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 21e8012d46a..2c32aa93cdf 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -278,6 +278,11 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return util_format_s3tc_enabled; } + /* u_format doesn't support RGTC yet */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + return FALSE; + } + /* * Everything else should be supported by u_format. */ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 1968d0feb35..990acea9f44 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -255,8 +255,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -265,8 +263,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -276,8 +272,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9f85bd4ce5f..d422ffe03f8 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -203,7 +203,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); @@ -353,6 +353,7 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CB_REG(R300_SC_SCREENDOOR, 0xffffff); if (r300->screen->caps.is_rv350) { OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 30073759476..e9c7d7bf63f 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -220,7 +220,7 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[20]; + uint32_t cb[22]; }; struct r300_vap_invariant_state { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 354144cac79..b97c45ac198 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -214,11 +214,18 @@ uint32_t r300_translate_texformat(enum pipe_format format, /* RGTC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + result |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: + result &= ~(0xfff << 9); /* mask off swizzle */ + result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT; return R500_TX_FORMAT_ATI1N | result; - case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_RGTC2_SNORM: + result |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: + result &= ~(0xfff << 9); /* mask off swizzle */ + result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT | + R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT; return R400_TX_FORMAT_ATI2N | result; default: return ~0; /* Unsupported/unknown. */ diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 4f86e3b4c38..8cb417f9731 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -98,31 +98,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) return 0; } -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +void eg_cf_vtx(struct r600_vertex_element *ve) { - struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); - - rstate = &ve->rstate; + struct r600_pipe_state *rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 64c52bca795..1b76f0098dd 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -118,10 +118,10 @@ unsigned r600_get_clock_crystal_freq(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, - unsigned size, unsigned alignment, - unsigned binding, unsigned usage); + unsigned size, unsigned alignment, + unsigned binding, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle, unsigned *array_mode); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1393df88757..8006e9b9a58 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -86,6 +86,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -135,6 +136,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: return 1; @@ -1441,7 +1443,8 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; } @@ -2778,12 +2781,13 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); - fprintf(stderr, "DATA_FORMAT:%d ", vtx->data_format); - fprintf(stderr, "NUM_FORMAT_ALL:%d ", vtx->num_format_all); - fprintf(stderr, "FORMAT_COMP_ALL:%d ", vtx->format_comp_all); - fprintf(stderr, "SRF_MODE_ALL:%d\n", vtx->srf_mode_all); + fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); + fprintf(stderr, "NUM:%d ", vtx->num_format_all); + fprintf(stderr, "COMP:%d ", vtx->format_comp_all); + fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); id++; - fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "OFFSET:%d\n", vtx->offset); //TODO id++; fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); @@ -2794,29 +2798,9 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "--------------------------------------\n"); } -static void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count) +static void r600_cf_vtx(struct r600_vertex_element *ve) { struct r600_pipe_state *rstate; - unsigned i = 0; - - if (count > 8) { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(8 - 1); - bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 8 - 1); - } else { - bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) | - S_SQ_CF_WORD1_BARRIER(1) | - S_SQ_CF_WORD1_COUNT(count - 1); - } - bytecode[i++] = S_SQ_CF_WORD0_ADDR(0); - bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) | - S_SQ_CF_WORD1_BARRIER(1); rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; @@ -2962,37 +2946,19 @@ out_unknown: int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve) { - unsigned ndw, i; - u32 *bytecode; - unsigned fetch_resource_start = 0, format, num_format, format_comp; + static int dump_shaders = -1; + + struct r600_bc bc; + struct r600_bc_vtx vtx; struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; - - /* 2 dwords for cf aligned to 4 + 4 dwords per input */ - ndw = 8 + ve->count * 4; - ve->fs_size = ndw * 4; - - /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); - if (ve->fetch_shader == NULL) { - return -ENOMEM; - } - - bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); - if (bytecode == NULL) { - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); - return -ENOMEM; - } - - if (rctx->family >= CHIP_CEDAR) { - eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - } else { - r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4); - fetch_resource_start = 160; - } + unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; + unsigned format, num_format, format_comp; + u32 *bytecode; + int i, r; /* vertex elements offset need special handling, if offset is bigger - * than what we can put in fetch instruction then we need to alterate + + * than what we can put in fetch instruction then we need to alterate * the vertex resource offset. In such case in order to simplify code * we will bound one resource per elements. It's a worst case scenario. */ @@ -3003,40 +2969,155 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru } } + memset(&bc, 0, sizeof(bc)); + r = r600_bc_init(&bc, r600_get_family(rctx->radeon)); + if (r) + return r; + + for (i = 0; i < ve->count; i++) { + if (elements[i].instance_divisor > 1) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = fui(1.0f / (float)elements[i].instance_divisor); + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + + memset(&alu, 0, sizeof(alu)); + alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); + alu.src[0].sel = i + 1; + alu.src[0].chan = 3; + + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(&bc, &alu))) { + r600_bc_clear(&bc); + return r; + } + } + } + for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { + r600_bc_clear(&bc); R600_ERR("unknown format %d\n", ve->elements[i].src_format); - r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); return -EINVAL; } /* see above for vbuffer_need_offset explanation */ vbuffer_index = elements[i].vertex_buffer_index; - if (ve->vbuffer_need_offset) { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start); - } else { - bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start); - } - bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) | - S_SQ_VTX_WORD0_SRC_SEL_X(0) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F); - bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) | - S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) | - S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) | - S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) | - S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) | - S_SQ_VTX_WORD1_DATA_FORMAT(format) | - S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) | - S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) | - S_SQ_VTX_WORD1_SRF_MODE_ALL(1) | - S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1); - bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); - bytecode[8 + i * 4 + 3] = 0; + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = (ve->vbuffer_need_offset ? i : vbuffer_index) + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.srf_mode_all = 1; + vtx.offset = elements[i].src_offset; + + if ((r = r600_bc_add_vtx(&bc, &vtx))) { + r600_bc_clear(&bc); + return r; + } + } + + r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0); + if (ve->fetch_shader == NULL) { + r600_bc_clear(&bc); + return -ENOMEM; + } + + ve->fs_size = bc.ndw*4; + if ((r = r600_bc_build(&bc))) { + r600_bc_clear(&bc); + return r; } + + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + + if (dump_shaders) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bc_dump(&bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL); + if (bytecode == NULL) { + r600_bc_clear(&bc); + r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL); + return -ENOMEM; + } + + memcpy(bytecode, bc.bytecode, ve->fs_size); + r600_bo_unmap(rctx->radeon, ve->fetch_shader); + r600_bc_clear(&bc); + + if (rctx->family >= CHIP_CEDAR) + eg_cf_vtx(ve); + else + r600_cf_vtx(ve); + return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 453c29790c1..dbd1e204b49 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -103,6 +103,7 @@ struct r600_bc_vtx { unsigned num_format_all; unsigned format_comp_all; unsigned srf_mode_all; + unsigned offset; }; struct r600_bc_output { @@ -187,7 +188,7 @@ struct r600_bc { /* eg_asm.c */ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); -void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); +void eg_cf_vtx(struct r600_vertex_element *ve); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 9865ea17ae5..04408a5cc8e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -225,7 +225,7 @@ struct texture_orig_info { unsigned height0; }; -static void r600_s3tc_to_blittable(struct pipe_resource *tex, +static void r600_compressed_to_blittable(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -253,7 +253,7 @@ static void r600_s3tc_to_blittable(struct pipe_resource *tex, } -static void r600_reset_blittable_to_s3tc(struct pipe_resource *tex, +static void r600_reset_blittable_to_compressed(struct pipe_resource *tex, unsigned level, struct texture_orig_info *orig) { @@ -282,13 +282,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, restore_orig[0] = restore_orig[1] = FALSE; - if (util_format_is_s3tc(src->format)) { - r600_s3tc_to_blittable(src, src_level, &orig_info[0]); + if (util_format_is_compressed(src->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; } - if (util_format_is_s3tc(dst->format)) { - r600_s3tc_to_blittable(dst, dst_level, &orig_info[1]); + if (util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); restore_orig[1] = TRUE; /* translate the dst box as well */ dstx = util_format_get_nblocksx(orig_info[1].format, dstx); @@ -299,10 +299,10 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src, src_level, src_box); if (restore_orig[0]) - r600_reset_blittable_to_s3tc(src, src_level, &orig_info[0]); + r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); if (restore_orig[1]) - r600_reset_blittable_to_s3tc(dst, dst_level, &orig_info[1]); + r600_reset_blittable_to_compressed(dst, dst_level, &orig_info[1]); } void r600_init_blit_functions(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 0c5d7133c7a..2363cd1ebc5 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -132,13 +132,13 @@ static void r600_transfer_destroy(struct pipe_context *ctx, } static void r600_buffer_transfer_inline_write(struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { struct radeon *ws = (struct radeon*)pipe->winsys; struct r600_resource_buffer *rbuffer = r600_buffer(resource); @@ -224,7 +224,7 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen, rbuffer->r.b.b.b.depth0 = 1; rbuffer->r.b.b.b.array_size = 1; rbuffer->r.b.b.b.flags = 0; - rbuffer->r.b.user_ptr = ptr; + rbuffer->r.b.user_ptr = ptr; rbuffer->r.bo = NULL; rbuffer->r.bo_size = 0; return &rbuffer->r.b.b.b; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 34094001b75..3fd6668f718 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -77,8 +77,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags, u_upload_flush(rctx->vbuf_mgr->uploader); } -static void r600_update_num_contexts(struct r600_screen *rscreen, - int diff) +static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) { pipe_mutex_lock(rscreen->mutex_num_contexts); if (diff > 0) { @@ -286,13 +285,13 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_INSTANCED_DRAWING: return 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ - case PIPE_CAP_INSTANCED_DRAWING: return 0; case PIPE_CAP_ARRAY_TEXTURES: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 240c8f1ffd0..0b4dc75e584 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -241,10 +241,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; int r; - /* Would like some magic "get_bool_option_once" routine. - */ - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); + /* Would like some magic "get_bool_option_once" routine. + */ + if (dump_shaders == -1) + dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); @@ -420,6 +420,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; unsigned i; + int r; switch (d->Declaration.File) { case TGSI_FILE_INPUT: @@ -451,6 +452,26 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; + + case TGSI_FILE_SYSTEM_VALUE: + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = 0; + alu.src[0].chan = 3; + + alu.dst.sel = 0; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + break; + } + default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); return -EINVAL; @@ -521,6 +542,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { int index; if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && @@ -535,6 +557,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx, index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { + /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ + r600_src->swizzle[0] = 3; + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = 0; } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; @@ -2858,7 +2887,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3016,7 +3045,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 72707fbd8b8..3c072fe7ca9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -299,13 +299,13 @@ void r600_spi_update(struct r600_pipe_context *rctx) tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rctx->family < CHIP_CEDAR) { - if (rshader->input[i].centroid) - tmp |= S_028644_SEL_CENTROID(1); + if (rctx->family < CHIP_CEDAR) { + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - tmp |= S_028644_SEL_LINEAR(1); - } + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + } r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } @@ -520,7 +520,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_context_pipe_state_set(&rctx->ctx, &vgt); rdraw.vgt_num_indices = draw.info.count; - rdraw.vgt_num_instances = 1; + rdraw.vgt_num_instances = draw.info.instance_count; rdraw.vgt_index_type = vgt_dma_index_type; rdraw.vgt_draw_initiator = vgt_draw_initiator; rdraw.indices = NULL; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 4c9d5609c06..048d0b61e3b 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -292,7 +292,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen, bind = PIPE_BIND_RENDER_TARGET; /* hackaround for S3TC */ - if (util_format_is_s3tc(res->format)) + if (util_format_is_compressed(res->format)) return TRUE; if (!screen->is_format_supported(screen, @@ -433,7 +433,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, } if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && - util_format_is_s3tc(templ->format)) + util_format_is_compressed(templ->format)) array_mode = V_038000_ARRAY_1D_TILED_THIN1; return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, @@ -887,12 +887,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, goto out_unknown; switch (format) { - case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_RGTC1_SNORM: + word4 |= sign_bit[0]; + case PIPE_FORMAT_RGTC1_UNORM: result = FMT_BC4; goto out_word4; - case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_RGTC2_SNORM: + word4 |= sign_bit[0] | sign_bit[1]; + case PIPE_FORMAT_RGTC2_UNORM: result = FMT_BC5; goto out_word4; default: diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index a06817c5735..603e1de7982 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -249,6 +249,11 @@ softpipe_is_format_supported( struct pipe_screen *screen, return util_format_s3tc_enabled; } + /* u_format doesn't implement RGTC yet */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + return FALSE; + } + /* * Everything else should be supported by u_format. */ |