diff options
author | Christian König <[email protected]> | 2011-03-03 00:59:12 +0100 |
---|---|---|
committer | Christian König <[email protected]> | 2011-03-03 00:59:12 +0100 |
commit | 0eccb1038a620bc76ba45ac00c293b3e88427510 (patch) | |
tree | 4bd9209ac6e9b824284d47799b13a99a401c6963 /src/gallium/drivers | |
parent | ed12c29bc45b100b758c9affe2cebe8c8498e25e (diff) | |
parent | 2e756f3d6f15d61297a3bb4efe6a88c29081a5eb (diff) |
Merge remote branch 'origin/master' into pipe-video
Diffstat (limited to 'src/gallium/drivers')
42 files changed, 803 insertions, 817 deletions
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index 6e93da76209..039c8713570 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -31,8 +31,8 @@ #include "i915_batchbuffer.h" -#define BEGIN_BATCH(dwords, relocs) \ - (i915_winsys_batchbuffer_check(i915->batch, dwords, relocs)) +#define BEGIN_BATCH(dwords) \ + (i915_winsys_batchbuffer_check(i915->batch, dwords)) #define OUT_BATCH(dword) \ i915_winsys_batchbuffer_dword(i915->batch, dword) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index b4a91dabb37..9df82272604 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -41,11 +41,9 @@ i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, - size_t dwords, - size_t relocs) + size_t dwords) { - return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && - relocs <= (batch->max_relocs - batch->relocs); + return dwords * 4 <= i915_winsys_batchbuffer_space(batch); } static INLINE void @@ -71,7 +69,7 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, { assert (i915_winsys_batchbuffer_space(batch) >= size); - memcpy(data, batch->ptr, size); + memcpy(batch->ptr, data, size); batch->ptr += size; } diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index f885417f8ed..baaed3767ff 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -71,9 +71,9 @@ i915_fill_blit(struct i915_context *i915, return; } - if (!BEGIN_BATCH(6, 1)) { + if (!BEGIN_BATCH(6)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(6, 1)); + assert(BEGIN_BATCH(6)); } OUT_BATCH(CMD); OUT_BATCH(BR13); @@ -143,9 +143,9 @@ i915_copy_blit(struct i915_context *i915, */ assert (dst_pitch > 0 && src_pitch > 0); - if (!BEGIN_BATCH(8, 2)) { + if (!BEGIN_BATCH(8)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(8, 2)); + assert(BEGIN_BATCH(8)); } OUT_BATCH(CMD); OUT_BATCH(BR13); diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index cbf919754e5..84c8cb54436 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -39,7 +39,7 @@ #include "pipe/p_screen.h" -DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(i915_no_vbuf, "I915_NO_VBUF", FALSE) /* diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 22a2c7b2cb4..f2044d661e3 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -59,9 +59,9 @@ static void i915_flush_pipe( struct pipe_context *pipe, if (flags & PIPE_FLUSH_TEXTURE_CACHE) flush |= FLUSH_MAP_CACHE; - if (!BEGIN_BATCH(1, 0)) { + if (!BEGIN_BATCH(1)) { FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1, 0)); + assert(BEGIN_BATCH(1)); } OUT_BATCH( flush ); } diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index dd997e2cf48..276e33d4b9d 100644 --- a/src/gallium/drivers/i915/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -144,7 +144,7 @@ emit_prim( struct draw_stage *stage, vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -152,7 +152,7 @@ emit_prim( struct draw_stage *stage, i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4)) { assert(0); return; } diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 3473c863970..fb4c0516dd8 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -465,7 +465,7 @@ draw_arrays_fallback(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -474,7 +474,7 @@ draw_arrays_fallback(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } @@ -514,7 +514,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -523,7 +523,7 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(2, 0)) { + if (!BEGIN_BATCH(2)) { assert(0); goto out; } @@ -635,7 +635,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (i915->hardware_dirty) i915_emit_hardware_state(i915); - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: @@ -644,7 +644,7 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, i915_emit_hardware_state(i915); i915->vbo_flushed = 1; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2)) { assert(0); goto out; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 0323ad940f9..15350c0a5d7 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -40,13 +40,19 @@ struct i915_tracked_hw_state { const char *name; - void (*validate)(struct i915_context *); + void (*validate)(struct i915_context *, unsigned *batch_space); void (*emit)(struct i915_context *); unsigned dirty, batch_space; }; static void +validate_flush(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->flush_dirty ? 1 : 0; +} + +static void emit_flush(struct i915_context *i915) { /* Cache handling is very cheap atm. State handling can request to flushes: @@ -61,109 +67,343 @@ emit_flush(struct i915_context *i915) OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, + + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + static void -validate_immediate(struct i915_context *i915) +emit_invariant(struct i915_context *i915) { + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + Elements(invariant_state)*sizeof(uint32_t)); +} + +static void +validate_immediate(struct i915_context *i915, unsigned *batch_space) +{ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; + + *batch_space = 1 + util_bitcount(dirty); } static void -validate_static(struct i915_context *i915) +emit_immediate(struct i915_context *i915) { - if (i915->current.cbuf_bo) + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } +} + +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} + +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); + } +} + +static void +validate_static(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = 2 + 5; /* including DRAW_RECT */ + + if (i915->current.cbuf_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.cbuf_bo; + *batch_space += 3; + } - if (i915->current.depth_bo) + if (i915->current.depth_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.depth_bo; + *batch_space += 3; + } } static void -validate_map(struct i915_context *i915) +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } + + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } + + { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } +} + +static void +validate_map(struct i915_context *i915, unsigned *batch_space) { const uint enabled = i915->current.sampler_enable_flags; uint unit; struct i915_texture *tex; + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; for (unit = 0; unit < I915_TEX_UNITS; unit++) { if (enabled & (1 << unit)) { - tex = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } } -const static struct i915_tracked_hw_state hw_atoms[] = { - { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, - { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE }, - { "static", validate_static, NULL, I915_HW_STATIC }, - { "map", validate_map, NULL, I915_HW_MAP } -}; +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } +} + +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} + +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } +} + +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } +} + +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->program_len; +} + +static void +emit_program(struct i915_context *i915) +{ + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); +} static boolean i915_validate_state(struct i915_context *i915, unsigned *batch_space) { - int i; + unsigned tmp; i915->num_validation_buffers = 0; - *batch_space = 0; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) { - hw_atoms[i].validate(i915); - *batch_space += hw_atoms[i].batch_space; - } + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = Elements(invariant_state); + else + *batch_space = 0; + +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM if (i915->num_validation_buffers == 0) return TRUE; if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, - i915->num_validation_buffers)) + i915->num_validation_buffers)) return FALSE; return TRUE; } -static void -emit_state(struct i915_context *i915) -{ - int i; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) - hw_atoms[i].emit(i915); -} - /* Push the state into the sarea and/or texture memory. */ void i915_emit_hardware_state(struct i915_context *i915 ) { unsigned batch_space; - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ - uintptr_t save_ptr; - size_t save_relocs; if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); @@ -173,262 +413,36 @@ i915_emit_hardware_state(struct i915_context *i915 ) assert(i915_validate_state(i915, &batch_space)); } - if(!BEGIN_BATCH(batch_space + dwords, relocs)) { + if(!BEGIN_BATCH(batch_space)) { FLUSH_BATCH(NULL); assert(i915_validate_state(i915, &batch_space)); - assert(BEGIN_BATCH(batch_space + dwords, relocs)); + assert(BEGIN_BATCH(batch_space)); } save_ptr = (uintptr_t)i915->batch->ptr; - save_relocs = i915->batch->relocs; - - emit_state(i915); - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIANT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - } - - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - /* remove unwatned bits and S7 */ - unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | - 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | - 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | - 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & - i915->immediate_dirty; - int i, num = util_bitcount(dirty); - assert(num && num <= I915_MAX_IMMEDIATE); - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - dirty << 4 | (num - 1)); - - if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { - if (i915->vbo) - OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - OUT_BATCH(0); - } - - for (i = 1; i < I915_MAX_IMMEDIATE; i++) { - if (dirty & (1 << i)) - OUT_BATCH(i915->current.immediate[i]); - } - } - -#if 01 - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - if (i915->dynamic_dirty & (1 << i)) - OUT_BATCH(i915->current.dynamic[i]); - } - } -#endif - -#if 01 - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - if (i915->current.cbuf_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.cbuf_flags); - OUT_RELOC(i915->current.cbuf_bo, - I915_USAGE_RENDER, - 0); - } - - /* What happens if no zbuf?? - */ - if (i915->current.depth_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.depth_flags); - OUT_RELOC(i915->current.depth_bo, - I915_USAGE_RENDER, - 0); - } - - { - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(i915->current.dst_buf_vars); - } - } -#endif - -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); - } - } -#endif - -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<<i)) { - OUT_BATCH( i915->current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } - } - } - } -#endif - -#if 01 - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_CONSTANTS) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH((1 << nr) - 1); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; - c += 4 * i; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } -#if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - } - } - } -#endif - -#if 01 - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } - } -#endif - -#if 01 - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - /* XXX flush only required when the draw_offset changes! */ - OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(i915->current.draw_offset); - OUT_BATCH(i915->current.draw_size); - OUT_BATCH(i915->current.draw_offset); - } -#endif - I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM + + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, ((uintptr_t)i915->batch->ptr - save_ptr) / 4, - i915->batch->relocs - save_relocs); + batch_space); + assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); i915->hardware_dirty = 0; i915->immediate_dirty = 0; i915->dynamic_dirty = 0; + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 97044499990..20cd23f8f73 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -164,7 +164,7 @@ static void update_framebuffer(struct i915_context *i915) assert(ret); if (i915->current.draw_offset != draw_offset) { i915->current.draw_offset = draw_offset; - /* XXX: only emit flush on change and not always in emit */ + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); } i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16); diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 4ac2f5b9777..21cfdc9613e 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -76,7 +76,6 @@ struct i915_winsys_batchbuffer { size_t size; size_t relocs; - size_t max_relocs; /*@}*/ }; diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 4f86db39926..6391ea7f3be 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,7 +22,6 @@ #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_texture.h" #include "r300_winsys.h" @@ -117,6 +116,14 @@ static boolean r300_fast_zclear_allowed(struct r300_context *r300) return r300_resource(fb->zsbuf->texture)->tex.zmask_dwords[fb->zsbuf->u.tex.level]; } +static boolean r300_hiz_clear_allowed(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + return r300_resource(fb->zsbuf->texture)->tex.hiz_dwords[fb->zsbuf->u.tex.level]; +} + static uint32_t r300_depth_clear_value(enum pipe_format format, double depth, unsigned stencil) { @@ -134,6 +141,13 @@ static uint32_t r300_depth_clear_value(enum pipe_format format, } } +static uint32_t r300_hiz_clear_value(double depth) +{ + uint32_t r = (uint32_t)(CLAMP(depth, 0, 1) * 255.5); + assert(r <= 255); + return r | (r << 8) | (r << 16) | (r << 24); +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -190,8 +204,6 @@ static void r300_clear(struct pipe_context* pipe, (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_hyperz_state *hyperz = (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_resource *zstex = - fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; uint32_t width = fb->width; uint32_t height = fb->height; boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); @@ -200,20 +212,18 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fast Z clear. * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) { - hyperz_dcv = hyperz->zb_depthclearvalue = - r300_depth_clear_value(fb->zsbuf->format, depth, stencil); - if (r300_fast_zclear_allowed(r300)) { + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + r300_mark_atom_dirty(r300, &r300->zmask_clear); buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - if (zstex->hiz_mem[fb->zsbuf->u.tex.level]) + if (r300_hiz_clear_allowed(r300)) { + r300->hiz_clear_value = r300_hiz_clear_value(depth); r300_mark_atom_dirty(r300, &r300->hiz_clear); - - /* XXX Change this to r300_mark_atom_dirty(r300, &r300->hyperz_state); - * once hiz offset is constant. */ - r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG); + } } /* Enable CBZB clear. */ @@ -240,14 +250,14 @@ static void r300_clear(struct pipe_context* pipe, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); - } else if (r300->zmask_clear.dirty) { + } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) { /* Just clear zmask and hiz now, this does not use the standard draw * procedure. */ unsigned dwords; /* Calculate zmask_clear and hiz_clear atom sizes. */ r300_update_hyperz_state(r300); - dwords = r300->zmask_clear.size + + dwords = (r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) + (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) + r300_get_num_cs_end_dwords(r300); @@ -257,9 +267,11 @@ static void r300_clear(struct pipe_context* pipe, } /* Emit clear packets. */ - r300_emit_zmask_clear(r300, r300->zmask_clear.size, - r300->zmask_clear.state); - r300->zmask_clear.dirty = FALSE; + if (r300->zmask_clear.dirty) { + r300_emit_zmask_clear(r300, r300->zmask_clear.size, + r300->zmask_clear.state); + r300->zmask_clear.dirty = FALSE; + } if (r300->hiz_clear.dirty) { r300_emit_hiz_clear(r300, r300->hiz_clear.size, r300->hiz_clear.state); @@ -279,9 +291,8 @@ static void r300_clear(struct pipe_context* pipe, /* Enable fastfill and/or hiz. * * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update - * looks if zmask/hiz is in use and enables fastfill accordingly. */ - if (r300->zmask_in_use || - (zstex && zstex->hiz_in_use[fb->zsbuf->u.tex.level])) { + * looks if zmask/hiz is in use and programs hardware accordingly. */ + if (r300->zmask_in_use || r300->hiz_in_use) { r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -295,7 +306,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); r300_blitter_begin(r300, R300_CLEAR_SURFACE); @@ -303,7 +314,7 @@ static void r300_clear_render_target(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } @@ -320,11 +331,11 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == dst->texture) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -334,8 +345,8 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, dstx, dsty, width, height); r300_blitter_end(r300); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -345,7 +356,7 @@ void r300_decompress_zmask(struct r300_context *r300) struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - if (!r300->zmask_in_use || r300->zmask_locked) + if (!r300->zmask_in_use || r300->hyperz_locked) return; r300->zmask_decompress = TRUE; @@ -420,12 +431,12 @@ static void r300_resource_copy_region(struct pipe_context *pipe, util_format_description(dst->format); struct pipe_box box; - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { if (fb->zsbuf->texture == src || fb->zsbuf->texture == dst) { r300_decompress_zmask(r300); } else { - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } @@ -463,7 +474,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } /* Handle compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC || + desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (util_format_get_blocksize(old_dst.format)) { case 8: /* 1 pixel = 4 bits, @@ -502,8 +514,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe, if (old_dst.format != new_dst.format) r300_resource_set_properties(pipe->screen, dst, 0, &old_dst); - if (r300->zmask_locked) { - r300->zmask_locked = FALSE; + if (r300->hyperz_locked) { + r300->hyperz_locked = FALSE; r300_mark_atom_dirty(r300, &r300->hyperz_state); } } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0be161fa07a..68943d561ba 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -90,8 +90,6 @@ struct r300_capabilities { boolean high_second_pipe; /* DXTC texture swizzling. */ boolean dxtc_swizzle; - /* Index bias (AKA index offset). */ - boolean index_bias_supported; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d422ffe03f8..166d965aa5b 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,7 +30,6 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -170,7 +169,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; @@ -203,11 +201,11 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* SC. */ R300_INIT_ATOM(scissor_state, 3); /* GB, FG, GA, SU, SC, RB3D. */ - R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0)); + R300_INIT_ATOM(invariant_state, 18 + (is_rv350 ? 4 : 0) + (is_r500 ? 4 : 0)); /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -216,7 +214,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(rs_state, 0); /* SC, US. */ - R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + R300_INIT_ATOM(fb_state_pipelined, 8); /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); @@ -227,7 +225,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) if (can_hyperz) { /* HiZ Clear */ if (has_hiz_ram) - R300_INIT_ATOM(hiz_clear, 0); + R300_INIT_ATOM(hiz_clear, 4); /* zmask clear */ R300_INIT_ATOM(zmask_clear, 4); } @@ -331,7 +329,7 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the VAP invariant state. */ { - BEGIN_CB(vap_invariant->cb, 9); + BEGIN_CB(vap_invariant->cb, r300->vap_invariant_state.size); OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CB_32F(1.0); @@ -339,6 +337,10 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_32F(1.0); OUT_CB_32F(1.0); OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } END_CB; } @@ -359,6 +361,11 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + + if (r300->screen->caps.is_r500) { + OUT_CB_REG(R500_GA_COLOR_CONTROL_PS3, 0); + OUT_CB_REG(R500_SU_TEX_WRAP_PS3, 0); + } END_CB; } @@ -447,16 +454,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, /* Render functions must be initialized after blitter. */ r300_init_render_functions(r300); + r300_init_states(&r300->context); rws->cs_set_flush(r300->cs, r300_flush_cb, r300); - /* setup hyper-z mm */ - if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - if (!r300_hyperz_init_mm(r300)) - goto fail; - - r300_init_states(&r300->context); - /* The KIL opcode needs the first texture unit to be enabled * on r3xx-r4xx. In order to calm down the CS checker, we bind this * dummy texture there. */ @@ -507,10 +508,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, } /* Print driver info. */ -#ifdef NDEBUG - if (DBG_ON(r300, DBG_INFO)) { -#else +#ifdef DEBUG { +#else + if (DBG_ON(r300, DBG_INFO)) { #endif fprintf(stderr, "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n" @@ -526,7 +527,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->get_value(rws, R300_VID_GART_SIZE) >> 20, rws->get_value(rws, R300_VID_VRAM_SIZE) >> 20, rws->get_value(rws, R300_CAN_AACOMPRESS) ? "YES" : "NO", - rws->get_value(rws, R300_CAN_HYPERZ) ? "YES" : "NO", + rws->get_value(rws, R300_CAN_HYPERZ) && + r300->screen->caps.zmask_ram ? "YES" : "NO", rws->get_value(rws, R300_CAN_HYPERZ) && r300->screen->caps.hiz_ram ? "YES" : "NO"); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index e9c7d7bf63f..6f2aab69ab1 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -102,7 +102,6 @@ struct r300_dsa_state { }; struct r300_hyperz_state { - int current_func; /* -1 after a clear before first op */ int flush; /* This is actually a command buffer with named dwords. */ uint32_t cb_flush_begin; @@ -220,11 +219,11 @@ struct r300_vertex_stream_state { }; struct r300_invariant_state { - uint32_t cb[22]; + uint32_t cb[26]; }; struct r300_vap_invariant_state { - uint32_t cb[9]; + uint32_t cb[11]; }; struct r300_viewport_state { @@ -295,6 +294,8 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t pitch_zmask; /* ZMASK_PITCH */ + uint32_t pitch_hiz; /* HIZ_PITCH */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ /* Parameters dedicated to the CBZB clear. */ @@ -363,8 +364,12 @@ struct r300_texture_desc { /* Zbuffer compression info for each miplevel. */ boolean zcomp8x8[R300_MAX_TEXTURE_LEVELS]; - /* If zero, then disable compression. */ + /* If zero, then disable Z compression/HiZ. */ unsigned zmask_dwords[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_dwords[R300_MAX_TEXTURE_LEVELS]; + /* Zmask/HiZ strides for each miplevel. */ + unsigned zmask_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned hiz_stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; }; struct r300_resource @@ -390,10 +395,6 @@ struct r300_resource /* Where the texture starts in the buffer. */ unsigned tex_offset; - /* HiZ memory allocations. */ - struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; - boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS]; - /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -412,6 +413,21 @@ struct r300_vertex_element_state { struct r300_vertex_stream_state vertex_stream; }; +enum r300_hiz_func { + HIZ_FUNC_NONE, + + /* The function, when determined, is set in stone + * until the next HiZ clear. */ + + /* MAX is written to the HiZ buffer. + * Used for LESS, LEQUAL. */ + HIZ_FUNC_MAX, + + /* MIN is written to the HiZ buffer. + * Used for GREATER, GEQUAL. */ + HIZ_FUNC_MIN, +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -545,22 +561,25 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; - + /* Whether fast color clear is enabled. */ boolean cbzb_clear; /* Whether ZMASK is enabled. */ boolean zmask_in_use; /* Whether ZMASK is being decompressed. */ boolean zmask_decompress; - /* Whether ZMASK is locked, i.e. should be disabled and cannot be taken over. */ - boolean zmask_locked; + /* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */ + boolean hyperz_locked; /* The zbuffer the ZMASK of which is locked. */ struct pipe_surface *locked_zbuffer; + /* Whether HIZ is enabled. */ + boolean hiz_in_use; + /* HiZ function. Can be either MIN or MAX. */ + enum r300_hiz_func hiz_func; + /* HiZ clear value. */ + uint32_t hiz_clear_value; void *dsa_decompress_zmask; - /* two mem block managers for hiz/zmask ram space */ - struct mem_block *hiz_mm; - struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; @@ -644,6 +663,9 @@ void r300_decompress_zmask(struct r300_context *r300); void r300_decompress_zmask_locked_unsafe(struct r300_context *r300); void r300_decompress_zmask_locked(struct r300_context *r300); +/* r300_hyperz.c */ +void r300_update_hyperz_state(struct r300_context* r300); + /* r300_query.c */ void r300_resume_query(struct r300_context *r300, struct r300_query *query); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2e4719ec82..24c82a3efd2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -425,27 +425,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf); if (can_hyperz) { - uint32_t surf_pitch; - struct r300_resource *tex; - int level = surf->base.u.tex.level; - tex = r300_resource(surf->base.texture); - - surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; - /* HiZ RAM. */ - if (r300->screen->caps.hiz_ram) { - if (tex->hiz_mem[level]) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); - OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); - } else { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } - } - + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } @@ -484,6 +469,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; + unsigned mspos0, mspos1; CS_LOCALS(r300); /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be @@ -507,38 +493,36 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - unsigned mspos0 = 0x66666666; - unsigned mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } - } + mspos0 = 0x66666666; + mspos1 = 0x6666666; - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); END_CS; } @@ -1039,56 +1023,26 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } -static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) -{ - CS_LOCALS(r300); - BEGIN_CS(4); - OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); - OUT_CS(start); - OUT_CS(count); - OUT_CS(val); - END_CS; -} - -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct r300_hyperz_state *z = - (struct r300_hyperz_state*)r300->hyperz_state.state; - struct r300_screen* r300screen = r300->screen; - uint32_t stride, offset = 0, height, offset_shift; struct r300_resource* tex; - int i; + CS_LOCALS(r300); tex = r300_resource(fb->zsbuf->texture); - offset = tex->hiz_mem[fb->zsbuf->u.tex.level]->ofs; - stride = tex->tex.stride_in_pixels[fb->zsbuf->u.tex.level]; - - /* convert from pixels to 4x4 blocks */ - stride = ALIGN_DIVUP(stride, 4); - - stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); - /* there are 4 blocks per dwords */ - stride = ALIGN_DIVUP(stride, 4); - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - - offset_shift = 2; - offset_shift += (r300screen->caps.num_frag_pipes / 2); - - for (i = 0; i < height; i++) { - offset = i * stride; - offset <<= offset_shift; - r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); - } - z->current_func = -1; + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(0); + OUT_CS(tex->tex.hiz_dwords[fb->zsbuf->u.tex.level]); + OUT_CS(r300->hiz_clear_value); + END_CS; /* Mark the current zbuffer's hiz ram as in use. */ - tex->hiz_in_use[fb->zsbuf->u.tex.level] = TRUE; + r300->hiz_in_use = TRUE; + r300->hiz_func = HIZ_FUNC_NONE; + r300_mark_atom_dirty(r300, &r300->hyperz_state); } void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) @@ -1236,7 +1190,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) /* Emitted in flush. */ dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) dwords += 2; return dwords; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index c77cc08539d..9c41a1383ce 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -57,7 +57,7 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 873e0209d42..ecaadf4af8e 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_context.h" -#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" #include "r300_winsys.h" @@ -41,58 +40,74 @@ /* The HyperZ setup */ /*****************************************************************************/ -static bool r300_get_sc_hz_max(struct r300_context *r300) +static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_SC_HYPERZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; - if (func >= R300_ZS_GEQUAL && func <= R300_ZS_ALWAYS) - ret = R300_SC_HYPERZ_MAX; - return ret; + if (!dsa->dsa.depth.enabled || !dsa->dsa.depth.writemask) + return HIZ_FUNC_NONE; + + switch (dsa->dsa.depth.func) { + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + case PIPE_FUNC_NOTEQUAL: + case PIPE_FUNC_ALWAYS: + return HIZ_FUNC_NONE; + + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + return HIZ_FUNC_MAX; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + return HIZ_FUNC_MIN; + + default: + assert(0); + return HIZ_FUNC_NONE; + } } -static bool r300_zfunc_same_direction(int func1, int func2) +/* Return what's used for the depth test (either minimum or maximum). */ +static unsigned r300_get_sc_hz_max(struct r300_context *r300) { - /* func1 is less/lessthan */ - if ((func1 == R300_ZS_LESS || func1 == R300_ZS_LEQUAL) && - (func2 == R300_ZS_EQUAL || func2 == R300_ZS_GEQUAL || - func2 == R300_ZS_GREATER)) - return FALSE; - - /* func1 is greater/greaterthan */ - if ((func1 == R300_ZS_GEQUAL || func1 == R300_ZS_GREATER) && - (func2 == R300_ZS_LESS || func2 == R300_ZS_LEQUAL)) - return FALSE; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; - return TRUE; + return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN; } -static int r300_get_hiz_min(struct r300_context *r300) +static boolean r300_is_hiz_func_valid(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - int ret = R300_HIZ_MIN; + struct r300_dsa_state *dsa = r300->dsa_state.state; + unsigned func = dsa->dsa.depth.func; + + if (r300->hiz_func == HIZ_FUNC_NONE) + return TRUE; + + /* func1 is less/lessthan */ + if (r300->hiz_func == HIZ_FUNC_MAX && + (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER)) + return FALSE; - if (func == R300_ZS_LESS || func == R300_ZS_LEQUAL) - ret = R300_HIZ_MAX; - return ret; + /* func1 is greater/greaterthan */ + if (r300->hiz_func == HIZ_FUNC_MIN && + (func == PIPE_FUNC_LESS || func == PIPE_FUNC_LEQUAL)) + return FALSE; + + return TRUE; } static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) { - if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || - s->zfail_op != PIPE_STENCIL_OP_KEEP)) - return TRUE; - return FALSE; + return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP); } static boolean r300_can_hiz(struct r300_context *r300) { - struct r300_dsa_state *dsa_state = r300->dsa_state.state; - struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; - struct r300_screen* r300screen = r300->screen; - struct r300_hyperz_state *z = r300->hyperz_state.state; + struct r300_dsa_state *dsa = r300->dsa_state.state; + struct r300_screen *r300screen = r300->screen; /* shader writes depth - no HiZ */ if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ @@ -100,34 +115,21 @@ static boolean r300_can_hiz(struct r300_context *r300) if (r300->query_current) return FALSE; + /* if stencil fail/zfail op is not KEEP */ - if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || - r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1])) return FALSE; - if (dsa->depth.enabled) { + if (dsa->dsa.depth.enabled) { /* if depth func is EQUAL pre-r500 */ - if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) return FALSE; + /* if depth func is NOTEQUAL */ - if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL) return FALSE; } - /* depth comparison function - if just cleared save and return okay */ - if (z->current_func == -1) { - int func = dsa_state->z_stencil_control & R300_ZS_MASK; - if (func != 0 && func != 7) - z->current_func = dsa_state->z_stencil_control & R300_ZS_MASK; - } else { - /* simple don't change */ - if (!r300_zfunc_same_direction(z->current_func, - (dsa_state->z_stencil_control & R300_ZS_MASK))) { - DBG(r300, DBG_HYPERZ, - "z func changed direction - disabling hyper-z %d -> %d\n", - z->current_func, dsa_state->z_stencil_control); - return FALSE; - } - } return TRUE; } @@ -139,7 +141,6 @@ static void r300_update_hyperz(struct r300_context* r300) (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_resource *zstex = fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL; - boolean hiz_in_use = FALSE; z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; @@ -151,16 +152,12 @@ static void r300_update_hyperz(struct r300_context* r300) return; } - if (!zstex) + if (!zstex || + !r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) return; - if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) - return; - - hiz_in_use = zstex->hiz_in_use[fb->zsbuf->u.tex.level]; - /* Zbuffer compression. */ - if (r300->zmask_in_use && !r300->zmask_locked) { + if (r300->zmask_in_use && !r300->hyperz_locked) { z->zb_bw_cntl |= R300_FAST_FILL_ENABLE | /*R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE |*/ R300_RD_COMP_ENABLE; @@ -174,16 +171,28 @@ static void r300_update_hyperz(struct r300_context* r300) z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; } - if (hiz_in_use && r300_can_hiz(r300)) { - z->zb_bw_cntl |= R300_HIZ_ENABLE | - r300_get_hiz_min(r300); - - z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | - r300_get_sc_hz_max(r300); + /* HiZ. */ + if (r300->hiz_in_use && !r300->hyperz_locked) { + /* Set the HiZ function if needed. */ + if (r300->hiz_func == HIZ_FUNC_NONE) { + r300->hiz_func = r300_get_hiz_func(r300); + } - if (r300->screen->caps.is_r500) { - z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3 | - R500_HIZ_EQUAL_REJECT_ENABLE; + /* If the depth function is inverted, HiZ must be disabled. */ + if (!r300_is_hiz_func_valid(r300)) { + r300->hiz_in_use = FALSE; + } else if (r300_can_hiz(r300)) { + /* Setup the HiZ bits. */ + z->zb_bw_cntl |= + R300_HIZ_ENABLE | + (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX); + + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE | + r300_get_sc_hz_max(r300); + + if (r300->screen->caps.is_r500) { + z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE; + } } } @@ -282,18 +291,6 @@ static void r300_update_ztop(struct r300_context* r300) r300_mark_atom_dirty(r300, &r300->ztop_state); } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static void r300_update_hiz_clear(struct r300_context *r300) -{ - struct pipe_framebuffer_state *fb = - (struct pipe_framebuffer_state*)r300->fb_state.state; - uint32_t height; - - height = ALIGN_DIVUP(fb->zsbuf->height, 4); - r300->hiz_clear.size = height * 4; -} - void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); @@ -301,51 +298,4 @@ void r300_update_hyperz_state(struct r300_context* r300) if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } - - if (r300->hiz_clear.dirty) { - r300_update_hiz_clear(r300); - } -} - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) -{ - struct r300_resource *tex; - uint32_t zsize, ndw; - int level = surf->base.u.tex.level; - - tex = r300_resource(surf->base.texture); - - if (tex->hiz_mem[level]) - return; - - zsize = tex->tex.layer_size_in_bytes[level]; - zsize /= util_format_get_blocksize(tex->b.b.b.format); - ndw = ALIGN_DIVUP(zsize, 64); - - tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); -} - -boolean r300_hyperz_init_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - int frag_pipes = r300screen->caps.num_frag_pipes; - - if (r300screen->caps.hiz_ram) { - r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); - if (!r300->hiz_mm) { - return FALSE; - } - } - - return TRUE; -} - -void r300_hyperz_destroy_mm(struct r300_context *r300) -{ - struct r300_screen* r300screen = r300->screen; - - if (r300screen->caps.hiz_ram) { - u_mmDestroy(r300->hiz_mm); - r300->hiz_mm = NULL; - } } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h deleted file mode 100644 index d4c8e7c60a9..00000000000 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2010 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_HYPERZ_H -#define R300_HYPERZ_H - -struct r300_context; - -void r300_update_hyperz_state(struct r300_context* r300); - -void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); - -boolean r300_hyperz_init_mm(struct r300_context *r300); -void r300_hyperz_destroy_mm(struct r300_context *r300); - -#endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 1d93dab2ca2..bb30b1ab0be 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -467,6 +467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ +#define R500_VAP_TEX_TO_COLOR_CNTL 0x2218 + #define R300_VAP_CLIP_CNTL 0x221C # define R300_VAP_UCP_ENABLE_0 (1 << 0) # define R300_VAP_UCP_ENABLE_1 (1 << 1) @@ -857,6 +859,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_HORIZONTAL (0<<27) # define R500_TX_DIRECTION_VERITCAL (1<<27) +#define R500_SU_TEX_WRAP_PS3 0x4114 + /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ #define R300_GA_POINT_S0 0x4200 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2ead8667bda..0ec4a225865 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -205,7 +205,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, if (first_draw) { cs_dwords += r300_get_num_dirty_dwords(r300); - if (r300->screen->caps.index_bias_supported) + if (r300->screen->caps.is_r500) cs_dwords += 2; /* emit_index_offset */ if (emit_vertex_arrays) @@ -257,7 +257,7 @@ static boolean r300_emit_states(struct r300_context *r300, } r300_emit_dirty_state(r300); - if (r300->screen->caps.index_bias_supported) { + if (r300->screen->caps.is_r500) { if (r300->screen->caps.has_tcl) r500_emit_index_bias(r300, index_bias); else @@ -557,7 +557,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr1[i+1] + indexBias) << 16) | (ptr1[i] + indexBias)); @@ -581,7 +581,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count-1; i += 2) OUT_CS(((ptr2[i+1] + indexBias) << 16) | (ptr2[i] + indexBias)); @@ -601,7 +601,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode)); - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { for (i = 0; i < count; i++) OUT_CS(ptr4[i] + indexBias); } else { @@ -620,13 +620,12 @@ static void r300_draw_elements(struct r300_context *r300, int indexBias, unsigned indexSize = r300->index_buffer.index_size; struct pipe_resource* orgIndexBuffer = indexBuffer; boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ uint16_t indices3[3]; - if (indexBias && !r300->screen->caps.index_bias_supported) { + if (indexBias && !r300->screen->caps.is_r500) { r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } @@ -702,8 +701,7 @@ static void r300_draw_arrays(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) { boolean alt_num_verts = r300->screen->caps.is_r500 && - count > 65536 && - r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + count > 65536; unsigned short_count; /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ @@ -748,6 +746,8 @@ static void r300_draw_vbo(struct pipe_context* pipe, return; } + r300_update_derived_state(r300); + /* Start the vbuf manager and update buffers if needed. */ u_vbuf_mgr_draw_begin(r300->vbuf_mgr, info, &buffers_updated, &uploader_flushed); @@ -756,8 +756,6 @@ static void r300_draw_vbo(struct pipe_context* pipe, } /* Draw. */ - r300_update_derived_state(r300); - if (indexed) { if (count <= 8 && r300_resource(r300->index_buffer.buffer)->b.user_ptr) { diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 77a9c6ad86f..52d0247fbfd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -456,10 +456,6 @@ struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - r300screen->caps.index_bias_supported = - r300screen->caps.is_r500 && - rws->get_value(rws, R300_VID_DRM_2_3_0); - pipe_mutex_init(r300screen->num_contexts_mutex); util_slab_create(&r300screen->pool_buffers, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 09f18b3e624..b810f4081c8 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -45,7 +45,6 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" -#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -707,7 +706,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, else if (state->zsbuf) { r300->fb_state.size += 10; if (can_hyperz) - r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + r300->fb_state.size += 8; } /* The size of the rest of atoms stays the same. */ @@ -720,7 +719,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; @@ -738,28 +736,30 @@ r300_set_framebuffer_state(struct pipe_context* pipe, return; } - if (old_state->zsbuf && r300->zmask_in_use && !r300->zmask_locked) { + if (old_state->zsbuf && r300->zmask_in_use && !r300->hyperz_locked) { /* There is a zmask in use, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) { /* Decompress the currently bound zbuffer before we bind another one. */ r300_decompress_zmask(r300); + r300->hiz_in_use = FALSE; } } else { /* We don't bind another zbuffer, so lock the current one. */ - r300->zmask_locked = TRUE; + r300->hyperz_locked = TRUE; pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf); } - } else if (r300->zmask_locked && r300->locked_zbuffer) { + } else if (r300->hyperz_locked && r300->locked_zbuffer) { /* We have a locked zbuffer now, what are we gonna do? */ if (state->zsbuf) { if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) { /* We are binding some other zbuffer, so decompress the locked one, * it gets unlocked automatically. */ r300_decompress_zmask_locked_unsafe(r300); + r300->hiz_in_use = FALSE; } else { /* We are binding the locked zbuffer again, so unlock it. */ - r300->zmask_locked = FALSE; + r300->hyperz_locked = FALSE; } } } @@ -778,7 +778,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe, util_copy_framebuffer_state(r300->fb_state.state, state); - if (!r300->zmask_locked) { + if (!r300->hyperz_locked) { pipe_surface_reference(&r300->locked_zbuffer, NULL); } @@ -794,20 +794,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, break; } - /* Setup Hyper-Z. */ - if (can_hyperz) { - struct r300_surface *zs_surf = r300_surface(state->zsbuf); - struct r300_resource *tex = r300_resource(zs_surf->base.texture); - int level = zs_surf->base.u.tex.level; - - /* work out whether we can support hiz on this buffer */ - r300_hiz_alloc_block(r300, zs_surf); - - DBG(r300, DBG_HYPERZ, - "hyper-z features: hiz: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0, - tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef); - } - /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; @@ -818,27 +804,25 @@ r300_set_framebuffer_state(struct pipe_context* pipe, } /* Set up AA config. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { - aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - - switch (state->cbufs[0]->texture->nr_samples) { - case 2: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - break; - case 3: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - break; - case 4: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - break; - case 6: - aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - break; - } - } else { - aa->aa_config = 0; + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; } + } else { + aa->aa_config = 0; } if (DBG_ON(r300, DBG_FB)) { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 003fe9a58cd..a1e116f4b61 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,7 +29,6 @@ #include "r300_context.h" #include "r300_fs.h" -#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" #include "r300_state_inlines.h" @@ -642,8 +641,25 @@ static uint32_t r300_get_border_color(enum pipe_format format, /* Compressed formats. */ if (util_format_is_compressed(format)) { - util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - return uc.ui; + switch (format) { + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC1_UNORM: + /* Add 1/32 to round the border color instead of truncating. */ + /* The Y component is used for the border color. */ + border_swizzled[1] = border_swizzled[2] + 1.0f/32; + util_pack_color(border_swizzled, PIPE_FORMAT_B4G4R4A4_UNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_SNORM: + border_swizzled[0] = border_swizzled[2]; + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_SNORM, &uc); + return uc.ui; + case PIPE_FORMAT_RGTC2_UNORM: + util_pack_color(border_swizzled, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + return uc.ui; + default: + util_pack_color(border_swizzled, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + return uc.ui; + } } switch (desc->channel[0].size) { @@ -937,7 +953,7 @@ static void r300_decompress_depth_textures(struct r300_context *r300) state->sampler_state_count); unsigned i; - if (!r300->zmask_locked || !r300->locked_zbuffer) { + if (!r300->hyperz_locked || !r300->locked_zbuffer) { return; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index b97c45ac198..86ad0b8b8e0 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -171,8 +171,16 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, - util_format_is_compressed(format) && dxtc_swizzle); + if (util_format_is_compressed(format) && + dxtc_swizzle && + format != PIPE_FORMAT_RGTC2_UNORM && + format != PIPE_FORMAT_RGTC2_SNORM) { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + dxtc_swizzle); + } else { + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + FALSE); + } /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -197,41 +205,36 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - /* Add sign. */ - for (i = 0; i < desc->nr_channels; i++) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { - result |= sign_bit[i]; - } - } - - /* This is truly a special format. - * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) - * in the sampler unit. Also known as D3DFMT_CxV8U8. */ - if (format == PIPE_FORMAT_R8G8Bx_SNORM) { - return R300_TX_FORMAT_CxV8U8 | result; - } - /* RGTC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { switch (format) { case PIPE_FORMAT_RGTC1_SNORM: - result |= sign_bit[0]; + result |= sign_bit[1]; case PIPE_FORMAT_RGTC1_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT; return R500_TX_FORMAT_ATI1N | result; case PIPE_FORMAT_RGTC2_SNORM: - result |= sign_bit[0] | sign_bit[1]; + result |= sign_bit[2] | sign_bit[3]; case PIPE_FORMAT_RGTC2_UNORM: - result &= ~(0xfff << 9); /* mask off swizzle */ - result |= R300_TX_FORMAT_Y << R300_TX_FORMAT_R_SHIFT | - R300_TX_FORMAT_X << R300_TX_FORMAT_G_SHIFT; return R400_TX_FORMAT_ATI2N | result; default: return ~0; /* Unsupported/unknown. */ } } + /* This is truly a special format. + * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2) + * in the sampler unit. Also known as D3DFMT_CxV8U8. */ + if (format == PIPE_FORMAT_R8G8Bx_SNORM) { + return R300_TX_FORMAT_CxV8U8 | result; + } + + /* Add sign. */ + for (i = 0; i < desc->nr_channels; i++) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { + result |= sign_bit[i]; + } + } + /* See whether the components are of the same size. */ for (i = 1; i < desc->nr_channels; i++) { uniform = uniform && desc->channel[0].size == desc->channel[i].size; @@ -676,6 +679,8 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf) R300_DEPTHMACROTILE(tex->tex.macrotile[level]) | R300_DEPTHMICROTILE(tex->tex.microtile); surf->format = r300_translate_zsformat(surf->base.format); + surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level]; + surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level]; } else { surf->pitch = tex->tex.stride_in_pixels[level] | @@ -713,14 +718,8 @@ static void r300_texture_destroy(struct pipe_screen *screen, struct pipe_resource* texture) { struct r300_resource* tex = (struct r300_resource*)texture; - int i; r300_winsys_bo_reference(&tex->buf, NULL); - for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { - if (tex->hiz_mem[i]) - u_mmFreeMem(tex->hiz_mem[i]); - } - FREE(tex); } @@ -868,8 +867,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) - microtile = R300_BUFFER_SQUARETILED; + microtile = R300_BUFFER_SQUARETILED; break; } } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 2cfeec7d751..3846fb8b6b3 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -207,29 +207,6 @@ static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, return util_format_get_nblocksy(tex->b.b.b.format, height); } -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_resource *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.b.target == PIPE_TEXTURE_3D && - tex->b.b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.b.last_level; i++) { - size += tex->tex.stride_in_bytes[i] * - r300_texture_get_nblocksy(tex, i, FALSE); - } - - size *= tex->tex.depth0; - tex->tex.size_in_bytes = size; - } -} - /* Get a width in pixels from a stride in bytes. */ static unsigned stride_to_width(enum pipe_format format, unsigned stride_in_bytes) @@ -334,12 +311,17 @@ static void r300_setup_cbzb_flags(struct r300_screen *rscreen, tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; } -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static unsigned r300_pixels_to_dwords(unsigned stride, + unsigned height, + unsigned xblock, unsigned yblock) +{ + return (align(stride, xblock) * align(height, yblock)) / (xblock * yblock); +} -static void r300_setup_zmask_flags(struct r300_screen *screen, - struct r300_resource *tex) +static void r300_setup_hyperz_properties(struct r300_screen *screen, + struct r300_resource *tex) { - /* The tile size of 1 DWORD is: + /* The tile size of 1 DWORD in ZMASK RAM is: * * GPU Pipes 4x4 mode 8x8 mode * ------------------------------------------ @@ -348,8 +330,31 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, * RV530 1P/2Z 32x16 64x32 * 1P/1Z 16x16 32x32 */ - static unsigned num_blocks_x_per_dw[4] = {4, 8, 12, 8}; - static unsigned num_blocks_y_per_dw[4] = {4, 4, 4, 8}; + static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; + static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; + + /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), + * but the blocks have very weird ordering. + * + * With 2 pipes and an image of size 8xY, where Y >= 1, + * clearing 4 dwords clears blocks like this: + * + * 01012323 + * + * where numbers correspond to dword indices. The blocks are interleaved + * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). + * + * With 4 pipes and an image of size 8xY, where Y >= 4, + * clearing 8 dwords clears blocks like this: + * 01012323 + * 45456767 + * 01012323 + * 45456767 + * where numbers correspond to dword indices. The blocks are interleaved + * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) + */ + static unsigned hiz_align_x[4] = {8, 32, 48, 32}; + static unsigned hiz_align_y[4] = {8, 8, 8, 32}; if (util_format_is_depth_or_stencil(tex->b.b.b.format) && util_format_get_blocksizebits(tex->b.b.b.format) == 32 && @@ -363,30 +368,49 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, } for (i = 0; i <= tex->b.b.b.last_level; i++) { - unsigned numdw, compsize; + unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; + + stride = align(tex->tex.stride_in_pixels[i], 16); + height = u_minify(tex->b.b.b.height0, i); /* The 8x8 compression mode needs macrotiling. */ - compsize = screen->caps.z_compress == R300_ZCOMP_8X8 && + zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && tex->tex.macrotile[i] && tex->b.b.b.nr_samples <= 1 ? 8 : 4; - /* Get the zbuffer size (with the aligned width and height). */ - numdw = align(tex->tex.stride_in_pixels[i], - num_blocks_x_per_dw[pipes-1] * compsize) * - align(u_minify(tex->b.b.b.height0, i), - num_blocks_y_per_dw[pipes-1] * compsize); + /* Get the ZMASK buffer size in dwords. */ + zcomp_numdw = r300_pixels_to_dwords(stride, height, + zmask_blocks_x_per_dw[pipes-1] * zcompsize, + zmask_blocks_y_per_dw[pipes-1] * zcompsize); - /* Convert pixels -> dwords. */ - numdw = ALIGN_DIVUP(numdw, num_blocks_x_per_dw[pipes-1] * compsize * - num_blocks_y_per_dw[pipes-1] * compsize); + /* Check whether we have enough ZMASK memory. */ + if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && + zcomp_numdw <= screen->caps.zmask_ram * pipes) { + tex->tex.zmask_dwords[i] = zcomp_numdw; + tex->tex.zcomp8x8[i] = zcompsize == 8; - /* Check that we have enough ZMASK memory. */ - if (numdw <= screen->caps.zmask_ram * pipes) { - tex->tex.zmask_dwords[i] = numdw; - tex->tex.zcomp8x8[i] = compsize == 8; + tex->tex.zmask_stride_in_pixels[i] = + align(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); } else { tex->tex.zmask_dwords[i] = 0; tex->tex.zcomp8x8[i] = FALSE; + tex->tex.zmask_stride_in_pixels[i] = 0; + } + + /* Now setup HIZ. */ + stride = align(stride, hiz_align_x[pipes-1]); + height = align(height, hiz_align_y[pipes-1]); + + /* Get the HIZ buffer size in dwords. */ + hiz_numdw = (stride * height) / (8*8 * pipes); + + /* Check whether we have enough HIZ memory. */ + if (hiz_numdw <= screen->caps.hiz_ram * pipes) { + tex->tex.hiz_dwords[i] = hiz_numdw; + tex->tex.hiz_stride_in_pixels[i] = stride; + } else { + tex->tex.hiz_dwords[i] = 0; + tex->tex.hiz_stride_in_pixels[i] = 0; } } } @@ -395,7 +419,6 @@ static void r300_setup_zmask_flags(struct r300_screen *screen, static void r300_setup_tiling(struct r300_screen *screen, struct r300_resource *tex) { - struct r300_winsys_screen *rws = screen->rws; enum pipe_format format = tex->b.b.b.format; boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; boolean is_zb = util_format_is_depth_or_stencil(format); @@ -422,9 +445,7 @@ static void r300_setup_tiling(struct r300_screen *screen, break; case 2: - if (rws->get_value(rws, R300_VID_DRM_2_1_0)) { - tex->tex.microtile = R300_BUFFER_SQUARETILED; - } + tex->tex.microtile = R300_BUFFER_SQUARETILED; break; } @@ -494,8 +515,7 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen, r300_setup_miptree(rscreen, tex, FALSE); } - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_setup_zmask_flags(rscreen, tex); + r300_setup_hyperz_properties(rscreen, tex); if (tex->buf_size) { /* Make sure the buffer we got is large enough. */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index d5c73585c81..c0b66899f8b 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -60,10 +60,8 @@ enum r300_value_id { R300_VID_DRM_PATCHLEVEL, /* These should probably go away: */ - R300_VID_DRM_2_1_0, /* Square tiling. */ - R300_VID_DRM_2_3_0, /* R500 VAP regs, MSPOS regs, fixed tex3D size checking */ R300_VID_DRM_2_6_0, /* Hyper-Z, GB_Z_PEQ_CONFIG on rv350->r4xx, R500 FG_ALPHA_VALUE */ - R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask */ + R300_VID_DRM_2_8_0, /* R500 US_FORMAT regs, R500 ARGB2101010 colorbuffer, CMask, R16F/RG16F */ R300_CAN_HYPERZ, /* ZMask + HiZ */ R300_CAN_AACOMPRESS, /* CMask */ diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index b5fcc7106fe..cae3888051b 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -503,9 +503,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 3efdbaba0c3..4206b4a201d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -305,11 +305,16 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -328,6 +333,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | S_03C008_TYPE(1), 0xFFFFFFFF, NULL); @@ -370,7 +376,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index f0a1ee0cd02..c51a163bd06 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -108,8 +108,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 1b76f0098dd..0b7d6f70968 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -114,6 +114,8 @@ enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); unsigned r600_get_clock_crystal_freq(struct radeon *radeon); +unsigned r600_get_minor_version(struct radeon *radeon); +unsigned r600_get_num_backends(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; @@ -251,6 +253,7 @@ struct r600_context { unsigned num_query_running; struct list_head fenced_bo; unsigned max_db; /* for OQ */ + boolean predicate_drawing; }; struct r600_draw { @@ -283,6 +286,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3e478382801..d7a7928386f 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -3061,7 +3061,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; - vtx.mega_fetch_count = 16; + vtx.mega_fetch_count = 0x1F; vtx.dst_gpr = i + 1; vtx.dst_sel_x = desc->swizzle[0]; vtx.dst_sel_y = desc->swizzle[1]; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 3fd6668f718..651f994a502 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -431,7 +431,7 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; if ((usage & PIPE_BIND_SAMPLER_VIEW) && - r600_is_sampler_format_supported(format)) { + r600_is_sampler_format_supported(screen, format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8dc1f4ad5c3..5f701d87e8f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -227,7 +227,7 @@ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); void r600_init_surface_functions(struct r600_pipe_context *r600); -uint32_t r600_translate_texformat(enum pipe_format format, +uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 726668260cc..343403f92f3 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_pipe.h" +#include "r600d.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { @@ -66,6 +67,30 @@ static boolean r600_get_query_result(struct pipe_context *ctx, return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + uint mode) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + if (!query) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->ctx.predicate_drawing = true; + r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + +} + void r600_init_query_functions(struct r600_pipe_context *rctx) { rctx->context.create_query = r600_create_query; @@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx) rctx->context.begin_query = r600_begin_query; rctx->context.end_query = r600_end_query; rctx->context.get_query_result = r600_get_query_result; + + if (r600_get_num_backends(rctx->screen->radeon) > 0) + rctx->context.render_condition = r600_render_condition; } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 576067ae81e..1be641798f7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -358,11 +358,16 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, { struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); union util_color uc; + uint32_t coord_trunc = 0; if (rstate == NULL) { return NULL; } + if ((state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) || + (state->min_img_filter == PIPE_TEX_FILTER_NEAREST)) + coord_trunc = 1; + rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -379,7 +384,9 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_MC_COORD_TRUNCATE(coord_trunc) | + S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); @@ -420,7 +427,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c swizzle[1] = state->swizzle_g; swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - format = r600_translate_texformat(state->format, + format = r600_translate_texformat(ctx->screen, state->format, swizzle, &word4, &yuv_format); if (format == ~0) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 3dd54f45202..9e6ae295239 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -500,9 +500,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } -static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format) +static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { - return r600_translate_texformat(format, NULL, NULL, NULL) != ~0; + return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; } static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 048d0b61e3b..7d3d022d973 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -226,7 +226,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, w = mip_minify(ptex->width0, level); h = mip_minify(ptex->height0, level); - if (w < tile_width || h < tile_height) + if (w <= tile_width || h <= tile_height) rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; else rtex->array_mode[level] = array_mode; @@ -422,8 +422,13 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, /* Would like some magic "get_bool_option_once" routine. */ - if (force_tiling == -1) - force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE); + if (force_tiling == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + force_tiling = debug_get_bool_option("R600_TILING", TRUE); + else + force_tiling = debug_get_bool_option("R600_TILING", FALSE); + } if (force_tiling && permit_hardware_blit(screen, templ)) { if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && @@ -813,7 +818,8 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, } /* texture format translate */ -uint32_t r600_translate_texformat(enum pipe_format format, +uint32_t r600_translate_texformat(struct pipe_screen *screen, + enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p) { @@ -879,8 +885,13 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } - if (r600_enable_s3tc == -1) - r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + if (r600_enable_s3tc == -1) { + struct r600_screen *rscreen = (struct r600_screen *)screen; + if (r600_get_minor_version(rscreen->radeon) >= 9) + r600_enable_s3tc = 1; + else + r600_enable_s3tc = debug_get_bool_option("R600_ENABLE_S3TC", FALSE); + } if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { if (!r600_enable_s3tc) diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index e8558c49a7c..df70e2889e2 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -67,6 +67,10 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 603e1de7982..a06817c5735 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -249,11 +249,6 @@ softpipe_is_format_supported( struct pipe_screen *screen, return util_format_s3tc_enabled; } - /* u_format doesn't implement RGTC yet */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { - return FALSE; - } - /* * Everything else should be supported by u_format. */ diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index 66ddc565722..3dec5de3cc4 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -91,8 +91,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - draw_flush(softpipe->draw); - if (softpipe->fs == fs) return; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 76a3803224a..b7d54605e66 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,7 +40,7 @@ #include "svga_debug.h" -#define MAX_DMA_SIZE (8 * 1024 * 1024) +#define MAX_DMA_SIZE (4 * 1024 * 1024) /** diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index daf1024fd02..6c3275e74c0 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -40,9 +40,12 @@ /* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* */ -static int svga_shader_type( int unit ) +static int svga_shader_type( int shader ) { - return unit + 1; + assert(PIPE_SHADER_VERTEX + 1 == SVGA3D_SHADERTYPE_VS); + assert(PIPE_SHADER_FRAGMENT + 1 == SVGA3D_SHADERTYPE_PS); + assert(shader <= PIPE_SHADER_FRAGMENT); + return shader + 1; } |