diff options
author | Eric Anholt <[email protected]> | 2010-07-26 17:47:59 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2010-07-26 17:53:27 -0700 |
commit | afe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch) | |
tree | 78621707e71154c0b388b0baacffc26432b7e992 /src/gallium/drivers/r300 | |
parent | d64343f1ae84979bd154475badf11af8a9bfc2eb (diff) | |
parent | 5403ca79b225605c79f49866a6497c97da53be3b (diff) |
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better
testing coverage during development, and also gets past the conflicts
of the src/mesa/shader -> src/mesa/program move.
Conflicts:
src/mesa/Makefile
src/mesa/main/shaderapi.c
src/mesa/main/shaderobj.h
Diffstat (limited to 'src/gallium/drivers/r300')
37 files changed, 2285 insertions, 1372 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index dd897f6072f..728bc40a5bb 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -21,10 +21,10 @@ C_SOURCES = \ r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ - r300_state_invariant.c \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ + r300_texture_desc.c \ r300_tgsi_to_rc.c \ r300_transfer.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index ee19e9d2783..bf023daaa56 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -31,10 +31,10 @@ r300 = env.ConvenienceLibrary( 'r300_screen_buffer.c', 'r300_state.c', 'r300_state_derived.c', - 'r300_state_invariant.c', 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', + 'r300_texture_desc.c', 'r300_tgsi_to_rc.c', 'r300_transfer.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 2a477012912..d125196b6dc 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -24,12 +24,13 @@ #include "r300_texture.h" #include "util/u_format.h" +#include "util/u_pack_color.h" -enum r300_blitter_op +enum r300_blitter_op /* bitmask */ { - R300_CLEAR, - R300_CLEAR_SURFACE, - R300_COPY + R300_CLEAR = 1, + R300_CLEAR_SURFACE = 2, + R300_COPY = 4 }; static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) @@ -79,6 +80,31 @@ static void r300_blitter_end(struct r300_context *r300) } } +static uint32_t r300_depth_clear_cb_value(enum pipe_format format, + const float* rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + + if (util_format_get_blocksizebits(format) == 32) + return uc.ui; + else + return uc.us | (uc.us << 16); +} + +static boolean r300_cbzb_clear_allowed(struct r300_context *r300, + unsigned clear_buffers) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + + /* Only color clear allowed, and only one colorbuffer. */ + if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) + return FALSE; + + return r300_surface(fb->cbufs[0])->cbzb_allowed; +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -86,39 +112,81 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* XXX Implement fastfill. + /* My notes about fastfill: + * + * 1) Only the zbuffer is cleared. + * + * 2) The zbuffer must be micro-tiled and whole microtiles must be + * written. If microtiling is disabled, it locks up. * - * If fastfill is enabled, a few facts should be considered: + * 3) There is Z Mask RAM which contains a compressed zbuffer and + * it interacts with fastfill. We should figure out how to use it + * to get more performance. + * This is what we know about the Z Mask: * - * 1) Zbuffer must be micro-tiled and whole microtiles must be - * written. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel blocks, that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. * - * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be - * equal to 0. + * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must + * be equal to 0. (clear the Z Mask RAM with zeros) * - * 3) For 16-bit integer buffering, compression causes a hung with one or + * 5) For 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 4) Fastfill must not be used if reading of compressed Z data is disabled + * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * to avoid needless decompression. + * + * 7) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. - * (what the hell does that mean and how does it fit in clearing - * the buffers?) + * + * 8) ZB_CB_CLEAR does not interact with fastfill in any way. * * - Marek */ struct r300_context* r300 = r300_context(pipe); - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + uint32_t width = fb->width; + uint32_t height = fb->height; + + /* Enable CBZB clear. */ + if (r300_cbzb_clear_allowed(r300, buffers)) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + hyperz->zb_depthclearvalue = + r300_depth_clear_cb_value(surf->base.format, rgba); + + width = surf->cbzb_width; + height = surf->cbzb_height; + + r300->cbzb_clear = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + /* Clear. */ r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, - fb->width, - fb->height, + width, + height, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); + + /* Disable CBZB clear. */ + if (r300->cbzb_clear) { + r300->cbzb_clear = FALSE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ + if (r300->flush_counter == 0) + pipe->flush(pipe, 0, NULL); } /* Clear a region of a color surface to a constant value. */ @@ -185,14 +253,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - if (dst->format != src->format) { - debug_printf("r300: Implementation error: Format mismatch in %s\n" - " : src: %s dst: %s\n", __FUNCTION__, - util_format_short_name(src->format), - util_format_short_name(dst->format)); - debug_assert(0); - } - if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h index 69874712442..9d3d4fc1b19 100644 --- a/src/gallium/drivers/r300/r300_cb.h +++ b/src/gallium/drivers/r300/r300_cb.h @@ -89,9 +89,6 @@ CB_DEBUG(cs_count = size;) \ } while (0) -#define BEGIN_CS_AS_CB(r300, size) \ - BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords) - #define END_CB do { \ CB_DEBUG(if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index e6dca66d4a0..21f3b9d2610 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->has_hiz = TRUE; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -76,6 +77,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -106,6 +108,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -201,24 +204,28 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index ab649c38573..65750f54e71 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -42,6 +42,8 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Some chipsets do not have HiZ RAM. */ + boolean has_hiz; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 16a75aa612b..df903590583 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -32,23 +32,72 @@ #include "r300_emit.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_state_invariant.h" #include "r300_winsys.h" #include <inttypes.h> -static void r300_destroy_context(struct pipe_context* context) +static void r300_update_num_contexts(struct r300_screen *r300screen, + int diff) { - struct r300_context* r300 = r300_context(context); + if (diff > 0) { + p_atomic_inc(&r300screen->num_contexts); + + if (r300screen->num_contexts > 1) + util_mempool_set_thread_safety(&r300screen->pool_buffers, + UTIL_MEMPOOL_MULTITHREADED); + } else { + p_atomic_dec(&r300screen->num_contexts); + + if (r300screen->num_contexts <= 1) + util_mempool_set_thread_safety(&r300screen->pool_buffers, + UTIL_MEMPOOL_SINGLETHREADED); + } +} + +static void r300_release_referenced_objects(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *textures = + (struct r300_textures_state*)r300->textures_state.state; struct r300_query *query, *temp; - struct r300_atom *atom; + unsigned i; + /* Framebuffer state. */ + util_assign_framebuffer_state(fb, NULL); + + /* Textures. */ + for (i = 0; i < textures->sampler_view_count; i++) + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&textures->sampler_views[i], NULL); + + /* The special dummy texture for texkill. */ if (r300->texkill_sampler) { pipe_sampler_view_reference( (struct pipe_sampler_view**)&r300->texkill_sampler, NULL); } + /* The SWTCL VBO. */ + pipe_resource_reference(&r300->vbo, NULL); + + /* Vertex buffers. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); + } + + /* If there are any queries pending or not destroyed, remove them now. */ + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); + } +} + +static void r300_destroy_context(struct pipe_context* context) +{ + struct r300_context* r300 = r300_context(context); + struct r300_atom *atom; + util_blitter_destroy(r300->blitter); draw_destroy(r300->draw); @@ -62,23 +111,30 @@ static void r300_destroy_context(struct pipe_context* context) } } - /* If there are any queries pending or not destroyed, remove them now. */ - foreach_s(query, temp, &r300->query_list) { - remove_from_list(query); - FREE(query); - } - u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); translate_cache_destroy(r300->tran.translate_cache); + r300_release_referenced_objects(r300); + + r300->rws->cs_destroy(r300->cs); + + util_mempool_destroy(&r300->pool_transfers); + + r300_update_num_contexts(r300->screen, -1); + + FREE(r300->aa_state.state); FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); + FREE(r300->gpu_flush.state); + FREE(r300->hyperz_state.state); + FREE(r300->invariant_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); + FREE(r300->vap_invariant_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -89,7 +145,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300); } -static void r300_flush_cb(void *data) +void r300_flush_cb(void *data) { struct r300_context* const cs_context_copy = data; @@ -106,8 +162,10 @@ static void r300_flush_cb(void *data) static void r300_setup_atoms(struct r300_context* r300) { + boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; + boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); /* Create the actual atom list. * @@ -115,44 +173,75 @@ static void r300_setup_atoms(struct r300_context* r300) * can affect performance and conformance if not handled with care. * * Some atoms never change size, others change every emit - those have - * the size of 0 here. */ + * the size of 0 here. + * + * NOTE: The framebuffer state is split into these atoms: + * - gpu_flush (unpipelined regs) + * - aa_state (unpipelined regs) + * - fb_state (unpipelined regs) + * - hyperz_state (unpipelined regs followed by pipelined ones) + * - fb_state_pipelined (pipelined regs) + * The motivation behind this is to be able to emit a strict + * subset of the regs, and to have reasonable register ordering. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant_state, 71); + /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */ + R300_INIT_ATOM(gpu_flush, 9); + R300_INIT_ATOM(aa_state, 4); + R300_INIT_ATOM(fb_state, 0); + /* ZB (unpipelined), SC. */ + R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); - R300_INIT_ATOM(query_start, 4); + /* ZB, FG. */ + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); - R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); - R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(rs_state, 0); + /* SC. */ R300_INIT_ATOM(scissor_state, 3); + /* GB, FG, GA, SU, SC, RB3D. */ + R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + /* VAP. */ R300_INIT_ATOM(viewport_state, 9); - R300_INIT_ATOM(rs_block_state, 0); - R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); - R300_INIT_ATOM(texture_cache_inval, 2); - R300_INIT_ATOM(textures_state, 0); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + /* VAP, RS, GA, GB, SU, SC. */ + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(rs_state, 0); + /* SC, US. */ + R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); R300_INIT_ATOM(fs_constants, 0); + /* TX. */ + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); + /* ZB (unpipelined), SU. */ + R300_INIT_ATOM(query_start, 4); /* Replace emission functions for r500. */ - if (r300->screen->caps.is_r500) { + if (is_r500) { r300->fs.emit = r500_emit_fs; r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state; r300->fs_constants.emit = r500_emit_fs_constants; } /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->aa_state.state = CALLOC_STRUCT(r300_aa_state); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); @@ -162,27 +251,45 @@ static void r300_setup_atoms(struct r300_context* r300) } /* Some non-CSO atoms don't use the state pointer. */ - r300->invariant_state.allow_null_state = TRUE; + r300->fb_state_pipelined.allow_null_state = TRUE; r300->fs_rc_constant_state.allow_null_state = TRUE; r300->pvs_flush.allow_null_state = TRUE; r300->query_start.allow_null_state = TRUE; r300->texture_cache_inval.allow_null_state = TRUE; + + /* Some states must be marked as dirty here to properly set up + * hardware in the first command stream. */ + r300->invariant_state.dirty = TRUE; + r300->pvs_flush.dirty = TRUE; + r300->vap_invariant_state.dirty = TRUE; + r300->texture_cache_inval.dirty = TRUE; + r300->textures_state.dirty = TRUE; } /* Not every state tracker calls every driver function before the first draw * call and we must initialize the command buffers somehow. */ static void r300_init_states(struct pipe_context *pipe) { + struct r300_context *r300 = r300_context(pipe); struct pipe_blend_color bc = {{0}}; struct pipe_clip_state cs = {{{0}}}; struct pipe_scissor_state ss = {0}; struct r300_clip_state *clip = - (struct r300_clip_state*)r300_context(pipe)->clip_state.state; + (struct r300_clip_state*)r300->clip_state.state; + struct r300_gpu_flush *gpuflush = + (struct r300_gpu_flush*)r300->gpu_flush.state; + struct r300_vap_invariant_state *vap_invariant = + (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; + struct r300_invariant_state *invariant = + (struct r300_invariant_state*)r300->invariant_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; CB_LOCALS; pipe->set_blend_color(pipe, &bc); pipe->set_scissor_state(pipe, &ss); + /* Initialize the clip state. */ if (r300_context(pipe)->screen->caps.has_tcl) { pipe->set_clip_state(pipe, &cs); } else { @@ -190,6 +297,66 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CB; } + + /* Initialize the GPU flush. */ + { + BEGIN_CB(gpuflush->cb_flush_clean, 6); + + /* Flush and free renderbuffer caches. */ + OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Wait until the GPU is idle. + * This fixes random pixels sometimes appearing probably caused + * by incomplete rendering. */ + OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + END_CB; + } + + /* Initialize the VAP invariant state. */ + { + BEGIN_CB(vap_invariant->cb, 9); + OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + END_CB; + } + + /* Initialize the invariant state. */ + { + BEGIN_CB(invariant->cb, r300->invariant_state.size); + OUT_CB_REG(R300_GB_SELECT, 0); + OUT_CB_REG(R300_FG_FOG_BLEND, 0); + OUT_CB_REG(R300_GA_ROUND_MODE, 1); + OUT_CB_REG(R300_GA_OFFSET, 0); + OUT_CB_REG(R300_SU_TEX_WRAP, 0); + OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); + OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); + OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); + } + END_CB; + } + + /* Initialize the hyperz state. */ + { + BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_BW_CNTL, 0); + OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); + OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + END_CB; + } } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -202,6 +369,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; + r300_update_num_contexts(r300screen, 1); + r300->rws = rws; r300->screen = r300screen; @@ -211,6 +380,12 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.destroy = r300_destroy_context; + r300->cs = rws->cs_create(rws); + + util_mempool_create(&r300->pool_transfers, + sizeof(struct pipe_transfer), 64, + UTIL_MEMPOOL_SINGLETHREADED); + if (!r300screen->caps.has_tcl) { /* Create a Draw. This is used for SW TCL. */ r300->draw = draw_create(&r300->context); @@ -230,16 +405,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_blit_functions(r300); r300_init_flush_functions(r300); r300_init_query_functions(r300); - r300_init_render_functions(r300); r300_init_state_functions(r300); r300_init_resource_functions(r300); - r300->invariant_state.dirty = TRUE; + r300->blitter = util_blitter_create(&r300->context); - rws->set_flush_cb(r300->rws, r300_flush_cb, r300); - r300->dirty_hw++; + /* Render functions must be initialized after blitter. */ + r300_init_render_functions(r300); - r300->blitter = util_blitter_create(&r300->context); + rws->cs_set_flush(r300->cs, r300_flush_cb, r300); r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, @@ -280,11 +454,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.create_sampler_view(&r300->context, tex, &vtempl); pipe_resource_reference(&tex, NULL); - - /* This will make sure that the dummy texture is set up - * from the beginning even if an application does not use - * textures. */ - r300->textures_state.dirty = TRUE; } return &r300->context; @@ -296,11 +465,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, return NULL; } -boolean r300_check_cs(struct r300_context *r300, unsigned size) -{ - return size <= r300->rws->get_cs_free_dwords(r300->rws); -} - void r300_finish(struct r300_context *r300) { struct pipe_framebuffer_state *fb; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8d0b4bb3d37..b4256c62786 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -61,6 +61,13 @@ struct r300_atom { boolean allow_null_state; }; +struct r300_aa_state { + struct r300_surface *dest; + + uint32_t aa_config; + uint32_t aaresolve_ctl; +}; + struct r300_blend_state { uint32_t cb[8]; uint32_t cb_no_readwrite[8]; @@ -98,40 +105,39 @@ struct r300_dsa_state { boolean two_sided_stencil_ref; }; +struct r300_hyperz_state { + /* This is actually a command buffer with named dwords. */ + uint32_t cb_begin; + uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ + uint32_t cb_reg1; + uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ + uint32_t cb_reg2; + uint32_t sc_hyperz; /* R300_SC_HYPERZ */ +}; + +struct r300_gpu_flush { + uint32_t cb_flush_clean[6]; +}; + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; /* Draw-specific rasterizer state. */ struct pipe_rasterizer_state rs_draw; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ - uint32_t multisample_position_0;/* R300_GB_MSPOS0: 0x4010 */ - uint32_t multisample_position_1;/* R300_GB_MSPOS1: 0x4014 */ - uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ - uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ - uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ - uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ - uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ - uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ - uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + /* Command buffers. */ + uint32_t cb_main[25]; + uint32_t cb_poly_offset_zb16[5]; + uint32_t cb_poly_offset_zb24[5]; + + /* The index to cb_main where the cull_mode register value resides. */ + unsigned cull_mode_index; + + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enable; + + /* This is emitted in the draw function. */ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ - uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ - uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ - - /* Specifies top of Raster pipe specific enable controls, - * i.e. texture coordinates stuffing for points, lines, triangles */ - uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ - - /* Point sprites texture coordinates, 0: lower left, 1: upper right */ - float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ - float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */ - float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ - float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */ }; struct r300_rs_block { @@ -214,6 +220,14 @@ struct r300_vertex_stream_state { unsigned count; }; +struct r300_invariant_state { + uint32_t cb[20]; +}; + +struct r300_vap_invariant_state { + uint32_t cb[9]; +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -233,8 +247,8 @@ struct r300_ztop_state { struct r300_constant_buffer { /* Buffer of constants */ - uint32_t constants[256][4]; - /* Total number of constants */ + uint32_t *ptr; + /* Total number of vec4s */ unsigned count; }; @@ -294,32 +308,48 @@ struct r300_surface { enum r300_buffer_domain domain; - uint32_t offset; + uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ - uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */ + uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ + + /* Parameters dedicated to the CBZB clear. */ + uint32_t cbzb_width; /* Aligned width. */ + uint32_t cbzb_height; /* Half of the height. */ + uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ + uint32_t cbzb_pitch; /* DEPTHPITCH. */ + uint32_t cbzb_format; /* ZB_FORMAT. */ + + /* Whether the CBZB clear is allowed on the surface. */ + boolean cbzb_allowed; }; -struct r300_texture { - /* Parent class */ +struct r300_texture_desc { + /* Parent class. */ struct u_resource b; - enum r300_buffer_domain domain; + /* Buffer tiling. + * Macrotiling is specified per-level because small mipmaps cannot + * be macrotiled. */ + enum r300_buffer_tiling microtile; + enum r300_buffer_tiling macrotile[R300_MAX_TEXTURE_LEVELS]; /* Offsets into the buffer. */ - unsigned offset[R300_MAX_TEXTURE_LEVELS]; + unsigned offset_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch for each mip-level */ - unsigned pitch[R300_MAX_TEXTURE_LEVELS]; + /* Strides for each mip-level. */ + unsigned stride_in_pixels[R300_MAX_TEXTURE_LEVELS]; + unsigned stride_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* A pitch multiplied by blockwidth as hardware wants - * the number of pixels instead of the number of blocks. */ - unsigned hwpitch[R300_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face or 2D image based on the texture target. */ + unsigned layer_size_in_bytes[R300_MAX_TEXTURE_LEVELS]; - /* Size of one zslice or face based on the texture target */ - unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; + /* Total size of this texture, in bytes, + * derived from the texture properties. */ + unsigned size_in_bytes; - /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; + /* Total size of the buffer backing this texture, in bytes. + * It must be >= size. */ + unsigned buffer_size_in_bytes; /** * If non-zero, override the natural texture layout with @@ -329,16 +359,24 @@ struct r300_texture { * * \sa r300_texture_get_stride */ - unsigned stride_override; + unsigned stride_in_bytes_override; - /* Total size of this texture, in bytes. */ - unsigned size; + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. */ + boolean is_npot; - /* Whether this texture has non-power-of-two dimensions - * or a user-specified pitch. - * It can be either a regular texture or a rectangle one. - */ - boolean uses_pitch; + /* This flag says that hardware must use the stride for addressing + * instead of the width. */ + boolean uses_stride_addressing; + + /* Whether CBZB fast color clear is allowed on the miplevel. */ + boolean cbzb_allowed[R300_MAX_TEXTURE_LEVELS]; +}; + +struct r300_texture { + struct r300_texture_desc desc; + + enum r300_buffer_domain domain; /* Pipe buffer backing this texture. */ struct r300_winsys_buffer *buffer; @@ -349,8 +387,9 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; - /* Buffer tiling */ - enum r300_buffer_tiling microtile, macrotile; + /* This is the level tiling flags were last time set for. + * It's used to prevent redundant tiling-flags changes from happening.*/ + unsigned surface_level; }; struct r300_vertex_element_state { @@ -391,6 +430,8 @@ struct r300_context { /* The interface to the windowing system, etc. */ struct r300_winsys_screen *rws; + /* The command stream. */ + struct r300_winsys_cs *cs; /* Screen. */ struct r300_screen *screen; /* Draw module. Used mostly for SW TCL. */ @@ -421,6 +462,8 @@ struct r300_context { /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; + /* Anti-aliasing (MSAA) state. */ + struct r300_atom aa_state; /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ @@ -437,6 +480,10 @@ struct r300_context { struct r300_atom fs_constants; /* Framebuffer state. */ struct r300_atom fb_state; + /* Framebuffer state (pipelined regs). */ + struct r300_atom fb_state_pipelined; + /* HyperZ state (various SC/ZB bits). */ + struct r300_atom hyperz_state; /* Occlusion query. */ struct r300_atom query_start; /* Rasterizer state. */ @@ -459,8 +506,12 @@ struct r300_context { struct r300_atom ztop_state; /* PVS flush. */ struct r300_atom pvs_flush; + /* VAP invariant state. */ + struct r300_atom vap_invariant_state; /* Texture cache invalidate. */ struct r300_atom texture_cache_inval; + /* GPU flush. */ + struct r300_atom gpu_flush; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -497,10 +548,13 @@ struct r300_context { /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; + boolean cbzb_clear; /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; + struct util_mempool pool_transfers; + /* Stat counter. */ uint64_t flush_counter; }; @@ -534,8 +588,8 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); -boolean r300_check_cs(struct r300_context *r300, unsigned size); void r300_finish(struct r300_context *r300); +void r300_flush_cb(void *data); /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); @@ -563,6 +617,13 @@ void r300_translate_index_buffer(struct r300_context *r300, void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ +enum r300_fb_state_change { + R300_CHANGED_FB_STATE = 0, + R300_CHANGED_CBZB_FLAG +}; + +void r300_mark_fb_state_dirty(struct r300_context *r300, + enum r300_fb_state_change change); void r300_mark_fs_code_dirty(struct r300_context *r300); /* r300_debug.c */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 1db7da642bd..c194d6a1b08 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -46,12 +46,12 @@ */ #define CS_LOCALS(context) \ - struct r300_context* const cs_context_copy = (context); \ - struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ - CS_DEBUG(int cs_count = 0; (void) cs_count;) + struct r300_winsys_cs *cs_copy = (context)->cs; \ + struct r300_winsys_screen *cs_winsys = (context)->rws; \ + int cs_count = 0; (void) cs_count; (void) cs_winsys; #define BEGIN_CS(size) do { \ - assert(r300_check_cs(cs_context_copy, (size))); \ + assert(size <= (cs_copy->ndw - cs_copy->cdw)); \ CS_DEBUG(cs_count = size;) \ } while (0) @@ -66,49 +66,39 @@ #define END_CS #endif + /** * Writing pure DWORDs. */ #define OUT_CS(value) do { \ - cs_winsys->write_cs_dword(cs_winsys, (value)); \ + cs_copy->ptr[cs_copy->cdw++] = (value); \ CS_DEBUG(cs_count--;) \ } while (0) -#define OUT_CS_32F(value) do { \ - cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_32F(value) \ + OUT_CS(fui(value)) #define OUT_CS_REG(register, value) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ - cs_winsys->write_cs_dword(cs_winsys, value); \ - CS_DEBUG(cs_count -= 2;) \ + OUT_CS(CP_PACKET0(register, 0)); \ + OUT_CS(value); \ } while (0) /* Note: This expects count to be the number of registers, * not the actual packet0 count! */ -#define OUT_CS_REG_SEQ(register, count) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_REG_SEQ(register, count) \ + OUT_CS(CP_PACKET0((register), ((count) - 1))) -#define OUT_CS_TABLE(values, count) do { \ - cs_winsys->write_cs_table(cs_winsys, values, count); \ - CS_DEBUG(cs_count -= count;) \ -} while (0) +#define OUT_CS_ONE_REG(register, count) \ + OUT_CS(CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR) -#define OUT_CS_ONE_REG(register, count) do { \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ - CS_DEBUG(cs_count--;) \ -} while (0) +#define OUT_CS_PKT3(op, count) \ + OUT_CS(CP_PACKET3(op, count)) -#define OUT_CS_PKT3(op, count) do { \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ - CS_DEBUG(cs_count--;) \ +#define OUT_CS_TABLE(values, count) do { \ + memcpy(cs_copy->ptr + cs_copy->cdw, values, count * 4); \ + cs_copy->cdw += count; \ + CS_DEBUG(cs_count -= count;) \ } while (0) @@ -116,26 +106,26 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - CS_DEBUG(cs_count -= 3;) \ + OUT_CS(offset); \ + cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ + CS_DEBUG(cs_count -= 2;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \ + OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd); \ } while (0) -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ assert(tex); \ - OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \ + OUT_CS_RELOC(tex->buffer, offset, rd, wd); \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ assert(bo); \ - cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \ + cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->buf, rd, wd); \ CS_DEBUG(cs_count -= 2;) \ } while (0) @@ -146,7 +136,8 @@ #define WRITE_CS_TABLE(values, count) do { \ CS_DEBUG(assert(cs_count == 0);) \ - cs_winsys->write_cs_table(cs_winsys, values, count); \ + memcpy(cs_copy->ptr + cs_copy->cdw, (values), (count) * 4); \ + cs_copy->cdw += (count); \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index a6cd86e3920..053a64ea6d7 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,17 +29,21 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, - { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, + { "draw", DBG_DRAW, "Draw calls (for debugging)" }, + { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" }, + { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" }, + { "psc", DBG_PSC, "Vertex stream registers (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "fb", DBG_FB, "Framebuffer (for debugging)" }, + { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" }, + { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, - { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" }, - { "stats", DBG_STATS, "Gather statistics (for lulz)" }, + { "stats", DBG_STATS, "Gather statistics" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index d510d80a7bb..896aeef395d 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -36,7 +36,10 @@ enum r300_buffer_tiling { R300_BUFFER_LINEAR = 0, R300_BUFFER_TILED, - R300_BUFFER_SQUARETILED + R300_BUFFER_SQUARETILED, + + R300_BUFFER_UNKNOWN, + R300_BUFFER_SELECT_LAYOUT = R300_BUFFER_UNKNOWN }; enum r300_buffer_domain { /* bitfield */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e2c40d823d4..36a26a78717 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -170,15 +170,18 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat { struct r300_fragment_shader *fs = r300_fs(r300); struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned count = fs->shader->externals_count * 4; + unsigned count = fs->shader->externals_count; + unsigned i, j; CS_LOCALS(r300); if (count == 0) return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); + for (i = 0; i < count; i++) + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j])); END_CS; } @@ -190,7 +193,6 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo unsigned count = fs->shader->rc_state_count; unsigned first = fs->shader->externals_count; unsigned end = constants->Count; - uint32_t cdata[4]; unsigned j; CS_LOCALS(r300); @@ -203,11 +205,9 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo const float *data = get_rc_constant_state(r300, &constants->Constants[i]); - for (j = 0; j < 4; j++) - cdata[j] = pack_float24(data[j]); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS_TABLE(cdata, 4); + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(data[j])); } } END_CS; @@ -234,7 +234,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_TABLE(buf->ptr, count); END_CS; } @@ -267,13 +267,22 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo END_CS; } -void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) { - struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; - struct r300_surface* surf; - unsigned i; + struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height = fb->height; + uint32_t width = fb->width; CS_LOCALS(r300); + if (r300->cbzb_clear) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + height = surf->cbzb_height; + width = surf->cbzb_width; + } + BEGIN_CS(size); /* Set up scissors. @@ -281,27 +290,48 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ + OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); + END_CS; +} + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_aa_state *aa = (struct r300_aa_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); + + if (aa->dest) { + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain); + + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain); } - /* Flush and free renderbuffer caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); + END_CS; +} - /* Wait until the GPU is idle. - * This fixes random pixels sometimes appearing probably caused - * by incomplete rendering. */ - OUT_CS_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) +{ + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; + struct r300_surface* surf; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ @@ -317,28 +347,123 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); - - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), surf->format); - } - for (; i < 4; i++) { - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); } - /* Set up a zbuffer. */ - if (fb->zsbuf) { - surf = r300_surface(fb->zsbuf); + /* Set up the ZB part of the CBZB clear. */ + if (r300->cbzb_clear) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + } + /* Set up a zbuffer. */ + else if (fb->zsbuf) { + surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); + + /* HiZ RAM. */ + if (r300->screen->caps.has_hiz) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + + /* Z Mask RAM. (compressed zbuffer) */ + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } + + END_CS; +} + +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + +void r300_emit_hyperz_end(struct r300_context *r300) +{ + struct r300_hyperz_state z = + *(struct r300_hyperz_state*)r300->hyperz_state.state; + + z.zb_bw_cntl = 0; + z.zb_depthclearvalue = 0; + z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); +} + +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state) +{ + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); + + /* Colorbuffer format in the US block. + * (must be written after unpipelined regs) */ + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + for (i = 0; i < fb->nr_cbufs; i++) { + OUT_CS(r300_surface(fb->cbufs[i])->format); + } + for (; i < 4; i++) { + OUT_CS(R300_US_OUT_FMT_UNUSED); + } + + /* Multisampling. Depends on framebuffer sample count. + * These are pipelined regs and as such cannot be moved + * to the AA state. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + unsigned mspos0 = 0x66666666; + unsigned mspos1 = 0x6666666; + + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } + } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); } END_CS; } @@ -387,13 +512,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain, 0); + 0, query->domain); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain, 0); + 0, query->domain); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ @@ -401,13 +526,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain, 0); + 0, query->domain); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain, 0); + 0, query->domain); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -429,7 +554,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -443,10 +568,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -480,102 +605,27 @@ void r300_emit_query_end(struct r300_context* r300) } } +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = state; - struct pipe_framebuffer_state* fb = r300->fb_state.state; - float scale, offset; - unsigned mspos0, mspos1, aa_config; CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); - - /* Multisampling. Depends on framebuffer sample count. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - mspos0 = rs->multisample_position_0; - mspos1 = rs->multisample_position_1; - break; - } - - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); - - OUT_CS_REG(R300_GB_AA_CONFIG, aa_config); - } else { - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(rs->multisample_position_0); - OUT_CS(rs->multisample_position_1); - - OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); - } - } - - OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); - + OUT_CS_TABLE(rs->cb_main, 25); if (rs->polygon_offset_enable) { - scale = rs->depth_scale * 12; - offset = rs->depth_offset; - - switch (r300->zbuffer_bpp) { - case 16: - offset *= 4; - break; - case 24: - offset *= 2; - break; + if (r300->zbuffer_bpp == 16) { + OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); + } else { + OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); - OUT_CS_32F(scale); - OUT_CS_32F(offset); - OUT_CS_32F(scale); - OUT_CS_32F(offset); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); - OUT_CS(rs->polygon_offset_enable); - OUT_CS(rs->cull_mode); - OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); - OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); - OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule); - OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable); - OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); - OUT_CS_32F(rs->point_texcoord_left); - OUT_CS_32F(rs->point_texcoord_bottom); - OUT_CS_32F(rs->point_texcoord_right); - OUT_CS_32F(rs->point_texcoord_top); END_CS; } @@ -588,11 +638,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { + if (DBG_ON(r300, DBG_RS_BLOCK)) { r500_dump_rs_block(rs); - } - DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + fprintf(stderr, "r300: RS emit:\n"); + + for (i = 0; i < count; i++) + fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); + + for (i = 0; i < count; i++) + fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); + + fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); @@ -608,9 +667,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_IP_0, count); } OUT_CS_TABLE(rs->ip, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); - } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); OUT_CS(rs->count); @@ -622,13 +678,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_INST_0, count); } OUT_CS_TABLE(rs->inst, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); - } - - DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", - rs->count, rs->inst_count); - END_CS; } @@ -682,7 +731,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0, 0); + 0); } } END_CS; @@ -725,7 +774,7 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) for (i = 0; i < aos_count; i++) { buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); } END_CS; } @@ -734,7 +783,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.size); /* Set the pointer to our vertex buffer. The emitted values are this: @@ -750,7 +799,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); END_CS; } @@ -762,21 +811,25 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); + if (DBG_ON(r300, DBG_PSC)) { + fprintf(stderr, "r300: PSC emit:\n"); + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl[i]); + } + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl_ext[i]); + } + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl[i]); - } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl_ext[i]); - } END_CS; } @@ -789,6 +842,13 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) END_CS; } +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; @@ -813,6 +873,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); + /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -865,7 +926,7 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - OUT_CS_TABLE(buf->constants, count * 4); + OUT_CS_TABLE(buf->ptr, count * 4); END_CS; } @@ -924,27 +985,22 @@ void r300_emit_buffer_validate(struct r300_context *r300, } /* Clean out BOs. */ - r300->rws->reset_bos(r300->rws); + r300->rws->cs_reset_buffers(r300->cs); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_texture(fb->zsbuf->texture); assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, - 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->zsbuf)->domain); } /* ...textures... */ for (i = 0; i < texstate->count; i++) { @@ -953,48 +1009,31 @@ validate: } tex = r300_texture(texstate->sampler_views[i]->base.texture); - if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0); } /* ...occlusion query buffer... */ - if (r300->query_current) { - if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer, - 0, r300->query_current->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->query_current) + r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ - if (r300->vbo) { - if (!r300_add_buffer(r300->rws, r300->vbo, - r300_buffer(r300->vbo)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->vbo) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { for (i = 0; i < r300->velems->count; i++) { pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!r300_add_buffer(r300->rws, pbuf, - r300_buffer(pbuf)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf, + r300_buffer(pbuf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ - if (index_buffer) { - if (!r300_add_buffer(r300->rws, index_buffer, - r300_buffer(index_buffer)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - if (!r300->rws->validate(r300->rws)) { + if (index_buffer) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf, + r300_buffer(index_buffer)->domain, 0); + + if (!r300->rws->cs_validate(r300->cs)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 36a29894d01..5d05039669f 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_hyperz_end(struct r300_context *r300); + void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -59,6 +64,13 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); + void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_end(struct r300_context* r300); @@ -76,6 +88,9 @@ void r300_emit_textures_state(struct r300_context *r300, void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state); + void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); @@ -94,6 +109,9 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ba840bfff81..ae7b5759e78 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -25,6 +25,7 @@ #include "draw/draw_private.h" #include "util/u_simple_list.h" +#include "util/u_upload_mgr.h" #include "r300_context.h" #include "r300_cs.h" @@ -39,6 +40,9 @@ static void r300_flush(struct pipe_context* pipe, struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; + u_upload_flush(r300->upload_vb); + u_upload_flush(r300->upload_ib); + /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -48,12 +52,11 @@ static void r300_flush(struct pipe_context* pipe, } if (r300->dirty_hw) { + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { - r300->flush_counter++; - } - r300->rws->flush_cs(r300->rws); + r300->flush_counter++; + r300->rws->cs_flush(r300->cs); r300->dirty_hw = 0; /* New kitchen sink, baby. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index e585394304e..db5269912e2 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -173,7 +173,7 @@ static void get_external_state( t = (struct r300_texture*)texstate->sampler_views[i]->base.texture; /* XXX this should probably take into account STR, not just S. */ - if (t->uses_pitch) { + if (t->desc.is_npot) { switch (s->state.wrap_s) { case PIPE_TEX_WRAP_REPEAT: state->unit[i].wrap_mode = RC_WRAP_REPEAT; @@ -246,13 +246,14 @@ static void r300_emit_fs_code_to_buffer( if (r300->screen->caps.is_r500) { struct r500_fragment_program_code *code = &generic_code->code.r500; - shader->cb_code_size = 17 + + shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + imm_count * 7; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); @@ -288,11 +289,16 @@ static void r300_emit_fs_code_to_buffer( struct r300_fragment_program_code *code = &generic_code->code.r300; shader->cb_code_size = 19 + + (r300->screen->caps.is_r400 ? 2 : 0) + code->alu.length * 4 + (code->tex.length ? (1 + code->tex.length) : 0) + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); + + if (r300->screen->caps.is_r400) + OUT_CB_REG(R400_US_CODE_BANK, 0); + OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e5c76589528..e9528956019 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,13 +21,28 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_hyperz.h" #include "r300_context.h" +#include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" /*****************************************************************************/ +/* The HyperZ setup */ +/*****************************************************************************/ + +static void r300_update_hyperz(struct r300_context* r300) +{ + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + + z->zb_bw_cntl = 0; + z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + if (r300->cbzb_clear) + z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; +} + +/*****************************************************************************/ /* The ZTOP state */ /*****************************************************************************/ @@ -119,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { + r300_update_hyperz(r300); + } } diff --git a/src/gallium/drivers/r300/r300_public.h b/src/gallium/drivers/r300/r300_public.h new file mode 100644 index 00000000000..8e7a963c55d --- /dev/null +++ b/src/gallium/drivers/r300/r300_public.h @@ -0,0 +1,9 @@ + +#ifndef R300_PUBLIC_H +#define R300_PUBLIC_H + +struct r300_winsys_screen; + +struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws); + +#endif diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 10cb468dfcc..5b0121ce9e1 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -37,7 +37,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, struct r300_screen *r300screen = r300->screen; struct r300_query *q; - assert(query_type == PIPE_QUERY_OCCLUSION_COUNTER); + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) { + return NULL; + } q = CALLOC_STRUCT(r300_query); if (!q) @@ -55,7 +57,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, insert_at_tail(&r300->query_list, q); /* Open up the occlusion query buffer. */ - q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size); + q->buffer = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096, + PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, + q->domain); return (struct pipe_query*)q; } @@ -132,7 +136,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); - map = r300->rws->buffer_map(r300->rws, q->buffer, flags); + map = r300->rws->buffer_map(r300->rws, q->buffer, r300->cs, flags); if (!map) return FALSE; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index c783998c78d..2acc1a903e8 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2617,7 +2617,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_WR_COMP_DISABLE (0 << 4) # define R300_WR_COMP_ENABLE (1 << 4) # define R300_ZB_CB_CLEAR_RMW (0 << 5) -# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY (1 << 5) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) @@ -2673,6 +2673,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +/* Z Mask RAM is a Z compression buffer. + * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks, + * that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different pipe. + */ + +/* The dword offset into Z mask RAM (bits 18:4) */ +#define R300_ZB_ZMASK_OFFSET 0x4f30 + +/* Z Mask Pitch. */ +#define R300_ZB_ZMASK_PITCH 0x4f34 + +/* Access to Z Mask RAM in a manner similar to HiZ RAM. + * The indices are autoincrementing. */ +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3264,8 +3282,8 @@ enum { # define R500_FC_B_OP0_NONE (0 << 24) # define R500_FC_B_OP0_DECR (1 << 24) # define R500_FC_B_OP0_INCR (2 << 24) -# define R500_FC_B_OP1_DECR (0 << 26) -# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_NONE (0 << 26) +# define R500_FC_B_OP1_DECR (1 << 26) # define R500_FC_B_OP1_INCR (2 << 26) # define R500_FC_IGNORE_UNCOVERED (1 << 28) #define R500_US_FC_INT_CONST_0 0x4c00 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 4afd124c0eb..bae02135da9 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -35,7 +35,6 @@ #include "util/u_prim.h" #include "r300_cs.h" -#include "r300_cb.h" #include "r300_context.h" #include "r300_screen_buffer.h" #include "r300_emit.h" @@ -224,11 +223,12 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ + end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ cs_dwords += end_dwords; /* Reserve requested CS space. */ - if (!r300_check_cs(r300, cs_dwords)) { + if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) { r300->context.flush(&r300->context, 0, NULL); flushed = TRUE; } @@ -278,7 +278,6 @@ static boolean immd_is_good_idea(struct r300_context *r300, /* We shouldn't map buffers referenced by CS, busy buffers, * and ones placed in VRAM. */ - /* XXX Check for VRAM buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; vbi = velem->vertex_buffer_index; @@ -286,6 +285,10 @@ static boolean immd_is_good_idea(struct r300_context *r300, if (!checked[vbi]) { vbuf = &r300->vertex_buffer[vbi]; + if (!(r300_buffer(vbuf->buffer)->domain & R300_DOMAIN_GTT)) { + return FALSE; + } + if (r300_buffer_is_referenced(&r300->context, vbuf->buffer, R300_REF_CS | R300_REF_HW)) { @@ -299,8 +302,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, } /***************************************************************************** - * The emission of draw packets for r500. Older GPUs may use these functions * - * after resolving fallback issues (e.g. stencil ref two-sided). * + * The HWTCL draw functions. * ****************************************************************************/ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -316,74 +318,70 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Size of the vertex, in dwords. */ unsigned vertex_size = r300->velems->vertex_size_dwords; - /* Offsets of the attribute, in dwords, from the start of the vertex. */ - unsigned offset[PIPE_MAX_ATTRIBS]; - /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; /* Stride to the same attrib in the next vertex in the vertex buffer, * in dwords. */ - unsigned stride[PIPE_MAX_ATTRIBS] = {0}; + unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; + uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* mapelem[PIPE_MAX_ATTRIBS]; + struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; - CB_LOCALS; + CS_LOCALS(r300); /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - offset[i] = velem->src_offset / 4; size[i] = r300->velems->hw_format_size[i] / 4; vbi = velem->vertex_buffer_index; + vbuf = &r300->vertex_buffer[vbi]; + stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!map[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); - stride[vbi] = vbuf->stride / 4; - map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start; + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } + mapelem[i] = map[vbi] + (velem->src_offset / 4); } dwords = 9 + count * vertex_size; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); - BEGIN_CS_AS_CB(r300, dwords); - OUT_CB_REG(R300_GA_COLOR_CONTROL, + BEGIN_CS(dwords); + OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CB(count - 1); - OUT_CB(0); - OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - vbi = r300->velems->velem[i].vertex_buffer_index; - - OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]); + OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } } - END_CB; + END_CS; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (map[vbi]) { + if (transfer[vbi]) { vbuf = &r300->vertex_buffer[vbi]; pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); - map[vbi] = NULL; + transfer[vbi] = NULL; } } } @@ -475,7 +473,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0, 0); + r300_buffer(indexBuffer)->domain, 0); END_CS; } @@ -499,6 +497,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ boolean translate = FALSE; + unsigned new_offset; if (r300->skip_rendering) { return; @@ -508,6 +507,12 @@ static void r300_draw_range_elements(struct pipe_context* pipe, return; } + /* Index buffer range checking. */ + if ((start + count) * indexSize > indexBuffer->width0) { + fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); + return; + } + /* Set up fallback for incompatible vertex layout if needed. */ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { r300_begin_vertex_translate(r300); @@ -522,18 +527,17 @@ static void r300_draw_range_elements(struct pipe_context* pipe, &start, count); r300_update_derived_state(r300); - r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); + r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count, &new_offset); + start = new_offset; /* 15 dwords for emit_draw_elements */ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, indexBuffer, 15, buffer_offset, indexBias, NULL); - u_upload_flush(r300->upload_vb); - u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, - minIndex, maxIndex, mode, start, count); + minIndex, maxIndex, mode, start, count); } else { do { short_count = MIN2(count, 65534); @@ -865,13 +869,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, unsigned dwords = 6; CS_LOCALS(r300); - (void) i; (void) ptr; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, NULL, dwords, 0, 0, NULL); - DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); + DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); /* Uncomment to dump all VBOs rendered through this interface. * Slow and noisy! @@ -914,6 +917,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, unsigned free_dwords; CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); /* Reserve at least 256 dwords. * @@ -924,7 +928,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, NULL, 256, 0, 0, &end_cs_dwords); while (count) { - free_dwords = r300->rws->get_cs_free_dwords(r300->rws); + free_dwords = r300->cs->ndw - r300->cs->cdw; short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); @@ -1015,6 +1019,88 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) * End of SW TCL functions * ***************************************************************************/ +/* If we used a quad to draw a rectangle, the pixels on the main diagonal + * would be computed and stored twice, which makes the clear/copy codepaths + * somewhat inefficient. Instead we use a rectangular point sprite. */ +static void r300_blitter_draw_rectangle(struct blitter_context *blitter, + unsigned x1, unsigned y1, + unsigned x2, unsigned y2, + float depth, + enum blitter_attrib_type type, + const float attrib[4]) +{ + struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); + unsigned last_sprite_coord_enable = r300->sprite_coord_enable; + unsigned width = x2 - x1; + unsigned height = y2 - y1; + unsigned vertex_size = + type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; + unsigned dwords = 13 + vertex_size + + (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); + const float zeros[4] = {0, 0, 0, 0}; + CS_LOCALS(r300); + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) + r300->sprite_coord_enable = 1; + + r300_update_derived_state(r300); + + /* Mark some states we don't care about as non-dirty. */ + r300->clip_state.dirty = FALSE; + r300->viewport_state.dirty = FALSE; + + r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); + + DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); + + BEGIN_CS(dwords); + /* Set up GA. */ + OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); + + if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { + /* Set up the GA to generate texcoords. */ + OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); + OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CS_32F(attrib[0]); + OUT_CS_32F(attrib[3]); + OUT_CS_32F(attrib[2]); + OUT_CS_32F(attrib[1]); + } + + /* Set up VAP controls. */ + OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(1); + OUT_CS(0); + + /* Draw. */ + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | + R300_VAP_VF_CNTL__PRIM_POINTS); + + OUT_CS_32F(x1 + width * 0.5f); + OUT_CS_32F(y1 + height * 0.5f); + OUT_CS_32F(depth); + OUT_CS_32F(1); + + if (vertex_size == 8) { + if (!attrib) + attrib = zeros; + OUT_CS_TABLE(attrib, 4); + } + END_CS; + + /* Restore the state. */ + r300->clip_state.dirty = TRUE; + r300->rs_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + + r300->sprite_coord_enable = last_sprite_coord_enable; +} + static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_resource* dest, struct pipe_subresource subdest, @@ -1022,33 +1108,35 @@ static void r300_resource_resolve(struct pipe_context* pipe, struct pipe_subresource subsrc) { struct r300_context* r300 = r300_context(pipe); - struct r300_surface* destsurf = r300_surface( - dest->screen->get_tex_surface(dest->screen, - dest, subdest.face, subdest.level, 0, 0)); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen, src, subsrc.face, subsrc.level, 0, 0); float color[] = {0, 0, 0, 0}; - CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(destsurf->buffer, destsurf->offset, 0, destsurf->domain, 0); - - OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(destsurf->buffer, destsurf->pitch, 0, destsurf->domain, 0); + /* Enable AA resolve. */ + aa->dest = r300_surface( + dest->screen->get_tex_surface(dest->screen, dest, subdest.face, + subdest.level, 0, 0)); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, + aa->aaresolve_ctl = R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | - R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE); + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; + r300->aa_state.size = 12; + r300->aa_state.dirty = TRUE; + /* Resolve the surface. */ r300->context.clear_render_target(pipe, srcsurf, color, 0, 0, src->width0, src->height0); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x0); + /* Disable AA resolve. */ + aa->aaresolve_ctl = 0; + r300->aa_state.size = 4; + r300->aa_state.dirty = TRUE; pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); - pipe_surface_reference((struct pipe_surface**)&destsurf, NULL); + pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); } void r300_init_render_functions(struct r300_context *r300) @@ -1066,6 +1154,7 @@ void r300_init_render_functions(struct r300_context *r300) } r300->context.resource_resolve = r300_resource_resolve; + r300->blitter->draw_rectangle = r300_blitter_draw_rectangle; /* Plug in the two-sided stencil reference value fallback if needed. */ if (!r300->screen->caps.is_r500) diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index d509ded3ec8..9a6b4e12ff1 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -64,12 +64,12 @@ static void r300_stencilref_begin(struct r300_context *r300) struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; /* Save state. */ - sr->rs_cull_mode = rs->cull_mode; + sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index]; sr->zb_stencilrefmask = dsa->stencil_ref_mask; sr->ref_value_front = r300->stencil_ref.ref_value[0]; /* We *cull* pixels, therefore no need to mask out the bits. */ - rs->cull_mode |= R300_CULL_BACK; + rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK; r300->rs_state.dirty = TRUE; } @@ -81,7 +81,7 @@ static void r300_stencilref_switch_side(struct r300_context *r300) struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT; dsa->stencil_ref_mask = dsa->stencil_ref_bf; r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; @@ -97,7 +97,7 @@ static void r300_stencilref_end(struct r300_context *r300) struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; /* Restore state. */ - rs->cull_mode = sr->rs_cull_mode; + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode; dsa->stencil_ref_mask = sr->zb_stencilrefmask; r300->stencil_ref.ref_value[0] = sr->ref_value_front; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8f7c96b829c..676430f5fee 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -30,6 +30,7 @@ #include "r300_screen_buffer.h" #include "r300_state_inlines.h" #include "r300_winsys.h" +#include "r300_public.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -114,6 +115,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -206,6 +208,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 1; /* XXX guessed */ case PIPE_CAP_MAX_VS_PREDS: return is_r500 ? 4 : 0; /* XXX guessed. */ + case PIPE_CAP_GEOMETRY_SHADER4: + return 0; default: fprintf(stderr, "r300: Implementation error: Bad param %d\n", @@ -253,9 +257,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; - boolean is_rv350 = r300_screen(screen)->caps.is_rv350; - boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_S8_USCALED_Z24_UNORM; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_R10G10B10X2_SNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || @@ -269,12 +270,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - fprintf(stderr, "r300: Implementation error: Received bogus texture " - "target %d in %s\n", target, __FUNCTION__); - return FALSE; - } - + /* Check multisampling support. */ switch (sample_count) { case 0: case 1: @@ -295,8 +291,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && - /* Z24 cannot be sampled from on non-r5xx. */ - (is_r500 || !is_z24) && /* ATI1N is r5xx-only. */ (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ @@ -329,7 +323,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check vertex buffer format support. */ if (usage & PIPE_BIND_VERTEX_BUFFER && /* Half float is supported on >= RV350. */ - (is_rv350 || !is_half_float) && + (is_r400 || is_r500 || !is_half_float) && r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) { retval |= PIPE_BIND_VERTEX_BUFFER; } @@ -348,6 +342,8 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) struct r300_screen* r300screen = r300_screen(pscreen); struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + util_mempool_destroy(&r300screen->pool_buffers); + if (rws) rws->destroy(rws); @@ -387,7 +383,7 @@ static int r300_fence_finish(struct pipe_screen *screen, return 0; /* 0 == success */ } -struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) +struct pipe_screen* r300_screen_create(struct r300_winsys_screen *rws) { struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); @@ -403,6 +399,10 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) r300_init_debug(r300screen); r300_parse_chipset(&r300screen->caps); + util_mempool_create(&r300screen->pool_buffers, + sizeof(struct r300_buffer), 64, + UTIL_MEMPOOL_SINGLETHREADED); + r300screen->rws = rws; r300screen->screen.winsys = (struct pipe_winsys*)rws; r300screen->screen.destroy = r300_destroy_screen; @@ -423,9 +423,3 @@ struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) return &r300screen->screen; } - -struct r300_winsys_screen * -r300_winsys_screen(struct pipe_screen *screen) -{ - return r300_screen(screen)->rws; -} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 29cd5dbe267..18745b83a09 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -28,8 +28,12 @@ #include "r300_chipset.h" +#include "util/u_mempool.h" + #include <stdio.h> +struct r300_winsys_screen; + struct r300_screen { /* Parent class */ struct pipe_screen screen; @@ -39,16 +43,28 @@ struct r300_screen { /* Chipset capabilities */ struct r300_capabilities caps; + /* Memory pools. */ + struct util_mempool pool_buffers; + /** Combination of DBG_xxx flags */ unsigned debug; + + /* The number of created contexts to know whether we have multiple + * contexts or not. */ + int num_contexts; }; -/* Convenience cast wrapper. */ +/* Convenience cast wrappers. */ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } +static INLINE struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen) { + return r300_screen(screen)->rws; +} + /* Debug functionality. */ /** @@ -61,17 +77,20 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { * those changes. */ /*@{*/ -#define DBG_HELP (1 << 0) + /* Logging. */ +#define DBG_PSC (1 << 0) #define DBG_FP (1 << 1) #define DBG_VP (1 << 2) -/* The bit (1 << 3) is unused. */ +#define DBG_SWTCL (1 << 3) #define DBG_DRAW (1 << 4) #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) #define DBG_RS (1 << 7) #define DBG_FALL (1 << 8) #define DBG_FB (1 << 9) +#define DBG_RS_BLOCK (1 << 10) +#define DBG_CBZB (1 << 11) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 7959e6a2f9e..37a080ba48b 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -43,7 +43,7 @@ unsigned r300_buffer_is_referenced(struct pipe_context *context, if (r300_buffer_is_user_buffer(buf)) return PIPE_UNREFERENCED; - if (r300->rws->is_buffer_referenced(r300->rws, rbuf->buf, domain)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, domain)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -62,7 +62,8 @@ int r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned start, - unsigned count) + unsigned count, + unsigned *out_offset) { struct pipe_resource *upload_buffer = NULL; unsigned index_offset = start * index_size; @@ -79,7 +80,10 @@ int r300_upload_index_buffer(struct r300_context *r300, goto done; } *index_buffer = upload_buffer; - } + *out_offset = index_offset / index_size; + } else + *out_offset = start; + done: // if (upload_buffer) // pipe_resource_reference(&upload_buffer, NULL); @@ -119,31 +123,59 @@ int r300_upload_user_buffers(struct r300_context *r300) return ret; } -static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, - struct r300_buffer *rbuf) +static void r300_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) { + struct r300_screen *r300screen = r300_screen(screen); + struct r300_buffer *rbuf = r300_buffer(buf); struct r300_winsys_screen *rws = r300screen->rws; - if (rbuf->buf) { - rws->buffer_reference(rws, &rbuf->buf, NULL); - rbuf->buf = NULL; - } + if (rbuf->constant_buffer) + FREE(rbuf->constant_buffer); + + if (rbuf->buf) + rws->buffer_reference(rws, &rbuf->buf, NULL); + + util_mempool_free(&r300screen->pool_buffers, rbuf); } -static void r300_buffer_destroy(struct pipe_screen *screen, - struct pipe_resource *buf) +static struct pipe_transfer* +r300_default_get_transfer(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) { - struct r300_screen *r300screen = r300_screen(screen); - struct r300_buffer *rbuf = r300_buffer(buf); + struct r300_context *r300 = r300_context(context); + struct pipe_transfer *transfer = + util_mempool_malloc(&r300->pool_transfers); + + transfer->resource = resource; + transfer->sr = sr; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->slice_stride = 0; + transfer->data = NULL; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} - r300_winsys_buffer_destroy(r300screen, rbuf); - FREE(rbuf); +static void r300_default_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct r300_context *r300 = r300_context(pipe); + util_mempool_free(&r300->pool_transfers, transfer); } static void * r300_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { + struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300_screen(pipe->screen); struct r300_winsys_screen *rws = r300screen->rws; struct r300_buffer *rbuf = r300_buffer(transfer->resource); @@ -153,10 +185,8 @@ r300_buffer_transfer_map( struct pipe_context *pipe, if (rbuf->user_buffer) return (uint8_t *) rbuf->user_buffer + transfer->box.x; - - if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) { - goto just_map; - } + if (rbuf->constant_buffer) + return (uint8_t *) rbuf->constant_buffer + transfer->box.x; /* check if the mapping is to a range we already flushed */ if (transfer->usage & PIPE_TRANSFER_DISCARD) { @@ -170,16 +200,18 @@ r300_buffer_transfer_map( struct pipe_context *pipe, rws->buffer_reference(rws, &rbuf->buf, NULL); rbuf->num_ranges = 0; - rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, 16, - rbuf->b.b.bind, - rbuf->domain, - rbuf->b.b.width0); + rbuf->buf = + r300screen->rws->buffer_create(r300screen->rws, + rbuf->b.b.width0, 16, + rbuf->b.b.bind, + rbuf->b.b.usage, + rbuf->domain); break; } } } -just_map: - map = rws->buffer_map(rws, rbuf->buf, transfer->usage); + + map = rws->buffer_map(rws, rbuf->buf, r300->cs, transfer->usage); if (map == NULL) return NULL; @@ -204,9 +236,8 @@ static void r300_buffer_transfer_flush_region( struct pipe_context *pipe, if (rbuf->user_buffer) return; - - if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) - return; + if (rbuf->constant_buffer) + return; /* mark the range as used */ for(i = 0; i < rbuf->num_ranges; ++i) { @@ -237,14 +268,14 @@ static void r300_buffer_transfer_unmap( struct pipe_context *pipe, struct u_resource_vtbl r300_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ - r300_buffer_destroy, /* resource_destroy */ - r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ - r300_buffer_transfer_map, /* transfer_map */ + r300_buffer_destroy, /* resource_destroy */ + r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */ + r300_default_get_transfer, /* get_transfer */ + r300_default_transfer_destroy, /* transfer_destroy */ + r300_buffer_transfer_map, /* transfer_map */ r300_buffer_transfer_flush_region, /* transfer_flush_region */ - r300_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ + r300_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ }; struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, @@ -254,9 +285,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, struct r300_buffer *rbuf; unsigned alignment = 16; - rbuf = CALLOC_STRUCT(r300_buffer); - if (!rbuf) - goto error1; + rbuf = util_mempool_malloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; @@ -265,21 +294,29 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.b.screen = screen; rbuf->domain = R300_DOMAIN_GTT; + rbuf->num_ranges = 0; + rbuf->buf = NULL; + rbuf->constant_buffer = NULL; + rbuf->user_buffer = NULL; + + /* Alloc constant buffers in RAM. */ + if (templ->bind & PIPE_BIND_CONSTANT_BUFFER) { + rbuf->constant_buffer = MALLOC(templ->width0); + return &rbuf->b.b; + } - rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, - alignment, - rbuf->b.b.bind, - rbuf->domain, - rbuf->b.b.width0); + rbuf->buf = + r300screen->rws->buffer_create(r300screen->rws, + rbuf->b.b.width0, alignment, + rbuf->b.b.bind, rbuf->b.b.usage, + rbuf->domain); - if (!rbuf->buf) - goto error2; + if (!rbuf->buf) { + util_mempool_free(&r300screen->pool_buffers, rbuf); + return NULL; + } return &rbuf->b.b; -error2: - FREE(rbuf); -error1: - return NULL; } struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, @@ -287,28 +324,28 @@ struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen, unsigned bytes, unsigned bind) { + struct r300_screen *r300screen = r300_screen(screen); struct r300_buffer *rbuf; - rbuf = CALLOC_STRUCT(r300_buffer); - if (!rbuf) - goto no_rbuf; + rbuf = util_mempool_malloc(&r300screen->pool_buffers); rbuf->magic = R300_BUFFER_MAGIC; pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.vtbl = &r300_buffer_vtbl; rbuf->b.b.screen = screen; + rbuf->b.b.target = PIPE_BUFFER; rbuf->b.b.format = PIPE_FORMAT_R8_UNORM; rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE; rbuf->b.b.bind = bind; rbuf->b.b.width0 = bytes; rbuf->b.b.height0 = 1; rbuf->b.b.depth0 = 1; + rbuf->b.b.flags = 0; rbuf->domain = R300_DOMAIN_GTT; - + rbuf->num_ranges = 0; + rbuf->buf = NULL; + rbuf->constant_buffer = NULL; rbuf->user_buffer = ptr; return &rbuf->b.b; - -no_rbuf: - return NULL; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index ff355858704..cafa9f96f20 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -55,6 +55,7 @@ struct r300_buffer enum r300_buffer_domain domain; void *user_buffer; + void *constant_buffer; struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; unsigned num_ranges; }; @@ -67,7 +68,7 @@ int r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned start, - unsigned count); + unsigned count, unsigned *out_offset); struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); @@ -97,23 +98,4 @@ static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer) return r300_buffer(buffer)->user_buffer ? true : false; } -static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws, - struct pipe_resource *buffer, - int rd, int wr) -{ - struct r300_buffer *buf = r300_buffer(buffer); - - if (!buf->buf) - return true; - - return rws->add_buffer(rws, buf->buf, rd, wr); -} - -static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, - struct r300_texture *tex, - int rd, int wr) -{ - return rws->add_buffer(rws, tex->buffer, rd, wr); -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bc2b62ba541..3e221f2e02d 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -23,6 +23,7 @@ #include "draw/draw_context.h" +#include "util/u_blitter.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -428,14 +429,19 @@ static void r300_set_clip_state(struct pipe_context* pipe, clip->clip = *state; if (r300->screen->caps.has_tcl) { - BEGIN_CB(clip->cb, 29); - OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_UCP_START : R300_PVS_UCP_START)); - OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - OUT_CB_TABLE(state->ucp, 6 * 4); + r300->clip_state.size = 2 + !!state->nr * 3 + state->nr * 4; + + BEGIN_CB(clip->cb, r300->clip_state.size); + if (state->nr) { + OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_UCP_START : R300_PVS_UCP_START)); + OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, state->nr * 4); + OUT_CB_TABLE(state->ucp, state->nr * 4); + } OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + R300_PS_UCP_MODE_CLIP_AS_TRIFAN | + (state->depth_clamp ? R300_CLIP_DISABLE : 0)); END_CB; r300->clip_state.dirty = TRUE; @@ -608,32 +614,43 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, r300->dsa_state.dirty = TRUE; } +static void r300_tex_set_tiling_flags(struct r300_context *r300, + struct r300_texture *tex, unsigned level) +{ + /* Check if the macrotile flag needs to be changed. + * Skip changing the flags otherwise. */ + if (tex->desc.macrotile[tex->surface_level] != + tex->desc.macrotile[level]) { + /* Tiling determines how DRM treats the buffer data. + * We must flush CS when changing it if the buffer is referenced. */ + if (r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_CS)) + r300->context.flush(&r300->context, 0, NULL); + + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[level], + tex->desc.stride_in_bytes[0]); + + tex->surface_level = level; + } +} + /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ static void r300_fb_set_tiling_flags(struct r300_context *r300, - const struct pipe_framebuffer_state *old_state, - const struct pipe_framebuffer_state *new_state) + const struct pipe_framebuffer_state *state) { - struct r300_texture *tex; - unsigned i, level; + unsigned i; /* Set tiling flags for new surfaces. */ - for (i = 0; i < new_state->nr_cbufs; i++) { - tex = r300_texture(new_state->cbufs[i]->texture); - level = new_state->cbufs[i]->level; - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->mip_macrotile[level]); + for (i = 0; i < state->nr_cbufs; i++) { + r300_tex_set_tiling_flags(r300, + r300_texture(state->cbufs[i]->texture), + state->cbufs[i]->level); } - if (new_state->zsbuf) { - tex = r300_texture(new_state->zsbuf->texture); - level = new_state->zsbuf->level; - - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->mip_macrotile[level]); + if (state->zsbuf) { + r300_tex_set_tiling_flags(r300, + r300_texture(state->zsbuf->texture), + state->zsbuf->level); } } @@ -654,26 +671,49 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, surf->zslice, surf->face, surf->level, util_format_short_name(surf->format), - rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO", - rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0, + rtex->desc.macrotile[0] ? "YES" : " NO", + rtex->desc.microtile ? "YES" : " NO", + rtex->desc.stride_in_pixels[0], + tex->width0, tex->height0, tex->depth0, tex->last_level, util_format_short_name(tex->format)); } +void r300_mark_fb_state_dirty(struct r300_context *r300, + enum r300_fb_state_change change) +{ + struct pipe_framebuffer_state *state = r300->fb_state.state; + + /* What is marked as dirty depends on the enum r300_fb_state_change. */ + r300->gpu_flush.dirty = TRUE; + r300->fb_state.dirty = TRUE; + r300->hyperz_state.dirty = TRUE; + + if (change == R300_CHANGED_FB_STATE) { + r300->aa_state.dirty = TRUE; + r300->fb_state_pipelined.dirty = TRUE; + } + + /* Now compute the fb_state atom size. */ + r300->fb_state.size = 2 + (8 * state->nr_cbufs); + + if (r300->cbzb_clear) + r300->fb_state.size += 10; + else if (state->zsbuf) + r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + + /* The size of the rest of atoms stays the same. */ +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - if (state->nr_cbufs > 4) { - fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, " - "refusing to bind framebuffer state!\n", __FUNCTION__); - return; - } - if (r300->screen->caps.is_r500) { max_width = max_height = 4096; } else if (r300->screen->caps.is_r400) { @@ -692,8 +732,6 @@ static void draw_flush(r300->draw); } - r300->fb_state.dirty = TRUE; - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -704,12 +742,11 @@ static void } /* The tiling flags are dependent on the surface miplevel, unfortunately. */ - r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); + r300_fb_set_tiling_flags(r300, state); - memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + util_assign_framebuffer_state(r300->fb_state.state, state); - r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + - (state->zsbuf ? 10 : 0) + 11; + r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -728,6 +765,30 @@ static void } } + /* Set up AA config. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; + } + } else { + aa->aa_config = 0; + } + } + if (DBG_ON(r300, DBG_FB)) { fprintf(stderr, "r300: set_framebuffer_state:\n"); for (i = 0; i < state->nr_cbufs; i++) { @@ -826,6 +887,27 @@ static void* r300_create_rs_state(struct pipe_context* pipe, struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); int i; float psiz; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ + uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ + uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ + uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ + uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ + uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ + uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ + uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ + + /* Specifies top of Raster pipe specific enable controls, + * i.e. texture coordinates stuffing for points, lines, triangles */ + uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ + + /* Point sprites texture coordinates, 0: lower left, 1: upper right */ + float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ + float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */ + float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ + float point_texcoord_top = 0; /* R300_GA_POINT_T1: 0x420c */ + CB_LOCALS; /* Copy rasterizer state. */ rs->rs = *state; @@ -835,18 +917,18 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ #ifdef PIPE_ARCH_LITTLE_ENDIAN - rs->vap_control_status = R300_VC_NO_SWAP; + vap_control_status = R300_VC_NO_SWAP; #else - rs->vap_control_status = R300_VC_32BIT_SWAP; + vap_control_status = R300_VC_32BIT_SWAP; #endif /* If no TCL engine is present, turn off the HW TCL. */ if (!r300_screen(pipe->screen)->caps.has_tcl) { - rs->vap_control_status |= R300_VAP_TCL_BYPASS; + vap_control_status |= R300_VAP_TCL_BYPASS; } /* Point size width and height. */ - rs->point_size = + point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); @@ -856,68 +938,70 @@ static void* r300_create_rs_state(struct pipe_context* pipe, * Clamp to [0, max FB size] */ psiz = pipe->screen->get_paramf(pipe->screen, PIPE_CAP_MAX_POINT_WIDTH); - rs->point_minmax = + point_minmax = pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT; } else { /* We cannot disable the point-size vertex output, * so clamp it. */ psiz = state->point_size; - rs->point_minmax = + point_minmax = (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) | (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT); } /* Line control. */ - rs->line_control = pack_float_16_6x(state->line_width) | + line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; /* Enable polygon mode */ + polygon_mode = 0; if (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL) { - rs->polygon_mode = R300_GA_POLY_MODE_DUAL; + polygon_mode = R300_GA_POLY_MODE_DUAL; } /* Front face */ if (state->front_ccw) - rs->cull_mode = R300_FRONT_FACE_CCW; + cull_mode = R300_FRONT_FACE_CCW; else - rs->cull_mode = R300_FRONT_FACE_CW; + cull_mode = R300_FRONT_FACE_CW; /* Polygon offset */ + polygon_offset_enable = 0; if (util_get_offset(state, state->fill_front)) { - rs->polygon_offset_enable |= R300_FRONT_ENABLE; + polygon_offset_enable |= R300_FRONT_ENABLE; } if (util_get_offset(state, state->fill_back)) { - rs->polygon_offset_enable |= R300_BACK_ENABLE; + polygon_offset_enable |= R300_BACK_ENABLE; } + rs->polygon_offset_enable = polygon_offset_enable != 0; + /* Polygon mode */ - if (rs->polygon_mode) { - rs->polygon_mode |= + if (polygon_mode) { + polygon_mode |= r300_translate_polygon_mode_front(state->fill_front); - rs->polygon_mode |= + polygon_mode |= r300_translate_polygon_mode_back(state->fill_back); } if (state->cull_face & PIPE_FACE_FRONT) { - rs->cull_mode |= R300_CULL_FRONT; + cull_mode |= R300_CULL_FRONT; } if (state->cull_face & PIPE_FACE_BACK) { - rs->cull_mode |= R300_CULL_BACK; - } - - if (rs->polygon_offset_enable) { - rs->depth_offset = state->offset_units; - rs->depth_scale = state->offset_scale; + cull_mode |= R300_CULL_BACK; } if (state->line_stipple_enable) { - rs->line_stipple_config = + line_stipple_config = R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | (fui((float)state->line_stipple_factor) & R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); /* XXX this might need to be scaled up */ - rs->line_stipple_value = state->line_stipple_pattern; + line_stipple_value = state->line_stipple_pattern; + } else { + line_stipple_config = 0; + line_stipple_value = 0; } if (state->flatshade) { @@ -926,35 +1010,78 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->color_control = R300_SHADE_MODEL_SMOOTH; } - rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; /* Point sprites */ + stuffing_enable = 0; if (state->sprite_coord_enable) { - rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE; + stuffing_enable = R300_GB_POINT_STUFF_ENABLE; for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) - rs->stuffing_enable |= + stuffing_enable |= R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } - rs->point_texcoord_left = 0.0f; - rs->point_texcoord_right = 1.0f; + point_texcoord_left = 0.0f; + point_texcoord_right = 1.0f; switch (state->sprite_coord_mode) { case PIPE_SPRITE_COORD_UPPER_LEFT: - rs->point_texcoord_top = 0.0f; - rs->point_texcoord_bottom = 1.0f; + point_texcoord_top = 0.0f; + point_texcoord_bottom = 1.0f; break; case PIPE_SPRITE_COORD_LOWER_LEFT: - rs->point_texcoord_top = 1.0f; - rs->point_texcoord_bottom = 0.0f; + point_texcoord_top = 1.0f; + point_texcoord_bottom = 0.0f; break; } } - if (state->gl_rasterization_rules) { - rs->multisample_position_0 = 0x66666666; - rs->multisample_position_1 = 0x6666666; + /* Build the main command buffer. */ + BEGIN_CB(rs->cb_main, 25); + OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); + OUT_CB_REG(R300_GA_POINT_SIZE, point_size); + OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); + OUT_CB(point_minmax); + OUT_CB(line_control); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); + OUT_CB(polygon_offset_enable); + rs->cull_mode_index = 9; + OUT_CB(cull_mode); + OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); + OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); + OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); + OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); + OUT_CB_REG(R300_GB_ENABLE, stuffing_enable); + OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CB_32F(point_texcoord_left); + OUT_CB_32F(point_texcoord_bottom); + OUT_CB_32F(point_texcoord_right); + OUT_CB_32F(point_texcoord_top); + END_CB; + + /* Build the two command buffers for polygon offset setup. */ + if (polygon_offset_enable) { + float scale = state->offset_scale * 12; + float offset = state->offset_units * 4; + + BEGIN_CB(rs->cb_poly_offset_zb16, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; + + offset = state->offset_units * 2; + + BEGIN_CB(rs->cb_poly_offset_zb24, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; } return (void*)rs; @@ -986,8 +1113,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0) + - (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0) ? 5 : 0); + r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -1056,7 +1182,7 @@ static void* lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1); - sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; + sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; /* This is very high quality anisotropic filtering for R5xx. * It's good for benchmarking the performance of texturing but @@ -1170,7 +1296,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, /* Set the texrect factor in the fragment shader. * Needed for RECT and NPOT fallback. */ texture = r300_texture(views[i]->texture); - if (texture->uses_pitch) { + if (texture->desc.is_npot) { r300->fs_rc_constant_state.dirty = TRUE; } @@ -1204,6 +1330,7 @@ r300_create_sampler_view(struct pipe_context *pipe, { struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); + boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; if (view) { view->base = *templ; @@ -1219,8 +1346,9 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle); - if (r300_screen(pipe->screen)->caps.is_r500) { + view->swizzle, + is_r500); + if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } } @@ -1544,7 +1672,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { struct r300_context* r300 = r300_context(pipe); - struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); /* Copy state directly into shader. */ @@ -1621,8 +1748,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; - struct pipe_transfer *tr; - float *mapped; + uint32_t *mapped = r300_buffer(buf)->user_buffer; int max_size = 0, max_size_bytes = 0, clamped_size = 0; switch (shader) { @@ -1645,8 +1771,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, max_size_bytes = max_size * 4 * sizeof(float); if (buf == NULL || buf->width0 == 0 || - (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL) - { + (mapped = r300_buffer(buf)->constant_buffer) == NULL) { cbuf->count = 0; return; } @@ -1664,17 +1789,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, clamped_size = MIN2(buf->width0, max_size_bytes); cbuf->count = clamped_size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) { - unsigned i,j; - - /* Convert constants to float24. */ - for (i = 0; i < cbuf->count; i++) - for (j = 0; j < 4; j++) - cbuf->constants[i][j] = pack_float24(mapped[i*4+j]); - } else { - memcpy(cbuf->constants, mapped, clamped_size); - } + cbuf->ptr = mapped; } if (shader == PIPE_SHADER_VERTEX) { @@ -1690,8 +1805,6 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, } else if (shader == PIPE_SHADER_FRAGMENT) { r300->fs_constants.dirty = TRUE; } - - pipe_buffer_unmap(pipe, buf, tr); } void r300_init_state_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 3aa8deb63c8..a85db27064c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -102,7 +102,8 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) * they won't be rasterized. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED && + !(r300->sprite_coord_enable & (1 << i))) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->generic[i]); gen_count++; @@ -118,7 +119,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) /* WPOS. */ if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) { - DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n", + DBG(r300, DBG_SWTCL, "draw_emit_attrib: WPOS, index: %i\n", vs_outputs->wpos); r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->wpos); @@ -140,18 +141,19 @@ static void r300_swtcl_vertex_psc(struct r300_context *r300) /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + DBG(r300, DBG_SWTCL, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d," - " vs_output_tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - vs_output_tab[i]); - - /* Make sure we have a proper destination for our attribute. */ - assert(vs_output_tab[i] != -1); + if (vs_output_tab[i] == -1) { + assert(0); + abort(); + } format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + DBG(r300, DBG_SWTCL, + "r300: swtcl_vertex_psc [%i] <- %s\n", + vs_output_tab[i], util_format_short_name(format)); + /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -526,15 +528,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) struct r300_sampler_state *sampler; struct r300_sampler_view *view; struct r300_texture *tex; - unsigned min_level, max_level, i, size; + unsigned min_level, max_level, i, j, size; unsigned count = MIN2(state->sampler_view_count, state->sampler_state_count); - unsigned char depth_swizzle[4] = { - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X, - UTIL_FORMAT_SWIZZLE_X - }; /* The KIL opcode fix, see below. */ if (!count && !r300->screen->caps.is_r500) @@ -561,14 +557,29 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* Assign a texture cache region. */ texstate->format.format1 |= view->texcache_region; - /* If compare mode is disabled, the sampler view swizzles - * are stored in the format. - * Otherwise, swizzles must be applied after the compare mode - * in the fragment shader. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { + /* Depth textures are kinda special. */ + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + unsigned char depth_swizzle[4]; + + if (!r300->screen->caps.is_r500 && + util_format_get_blocksizebits(tex->desc.b.b.format) == 32) { + /* X24x8 is sampled as Y16X16 on r3xx-r4xx. + * The depth here is at the Y component. */ + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_Y; + } else { + for (j = 0; j < 4; j++) + depth_swizzle[j] = UTIL_FORMAT_SWIZZLE_X; + } + + /* If compare mode is disabled, sampler view swizzles + * are stored in the format. + * Otherwise, the swizzles must be applied after the compare + * mode in the fragment shader. */ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, view->swizzle); + r300_get_swizzle_combined(depth_swizzle, + view->swizzle); } else { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, 0); @@ -576,12 +587,12 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) } /* to emulate 1D textures through 2D ones correctly */ - if (tex->b.b.target == PIPE_TEXTURE_1D) { + if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } - if (tex->uses_pitch) { + if (tex->desc.is_npot) { /* NPOT textures don't support mip filter, unfortunately. * This prevents incorrect rendering. */ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; @@ -608,7 +619,7 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) /* determine min/max levels */ /* the MAX_MIP level is the largest (finest) one */ max_level = MIN3(sampler->max_lod + view->base.first_level, - tex->b.b.last_level, view->base.last_level); + tex->desc.b.b.last_level, view->base.last_level); min_level = MIN2(sampler->min_lod + view->base.first_level, max_level); texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level); diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c deleted file mode 100644 index e67a0ae2444..00000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2009 Joakim Sindholt <[email protected]> - * Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_state_invariant.h" - -/* Calculate and emit invariant state. This is data that the 3D engine - * will probably want at the beginning of every CS, but it's not currently - * handled by any CSO setup, and in addition it doesn't really change much. - * - * Note that eventually this should be empty, but it's useful for development - * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state) -{ - CS_LOCALS(r300); - - BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); - - /*** Graphics Backend (GB) ***/ - /* Source of fog depth */ - OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - - /*** Fog (FG) ***/ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - - /*** VAP ***/ - /* Sign/normalize control */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); - /* TCL-only stuff */ - if (r300->screen->caps.has_tcl) { - /* Amount of time to wait for vertex fetches in PVS */ - OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); - } - - END_CS; - - /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) + - (r300->screen->caps.is_rv350 ? 4 : 0) + - (r300->screen->caps.is_r400 ? 2 : 0)); - - if (r300->screen->caps.has_tcl) { - /*Flushing PVS is required before the VAP_GB registers can be changed*/ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); - OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - } - /* XXX line tex stuffing */ - OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); - OUT_CS_32F(0.0); - OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); - OUT_CS_32F(1.0); - OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | - (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); - /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); - OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); - OUT_CS_REG(R300_GA_OFFSET, 0x00000000); - OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); - OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); - OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); - OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); - OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); - OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); - OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - - if (r300->screen->caps.is_rv350) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); - } - - OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); - OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - if (r300->screen->caps.is_r400) - OUT_CS_REG(R400_US_CODE_BANK, 0); - END_CS; -} diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h deleted file mode 100644 index 83d031c7fe9..00000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_INVARIANT_H -#define R300_STATE_INVARIANT_H - -struct r300_context; - -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state); - -#endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ddb66000561..fcdca5605e9 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_texture_desc.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" @@ -36,12 +37,6 @@ #include "util/u_memory.h" #include "pipe/p_screen.h" -#include "state_tracker/drm_api.h" - -enum r300_dim { - DIM_WIDTH = 0, - DIM_HEIGHT = 1 -}; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view) @@ -110,7 +105,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * The FORMAT specifies how the texture sampler will treat the texture, and * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean is_r500) { uint32_t result = 0; const struct util_format_description *desc; @@ -135,7 +131,10 @@ uint32_t r300_translate_texformat(enum pipe_format format, return R300_TX_FORMAT_X16; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return R500_TX_FORMAT_Y8X24; + if (is_r500) + return R500_TX_FORMAT_Y8X24; + else + return R300_TX_FORMAT_Y16X16; default: return ~0; /* Unsupported. */ } @@ -538,26 +537,27 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0) != ~0; + return r300_translate_texformat(format, 0, TRUE) != ~0; } static void r300_texture_setup_immutable_state(struct r300_screen* screen, struct r300_texture* tex) { struct r300_texture_format_state* f = &tex->tx_format; - struct pipe_resource *pt = &tex->b.b; + struct pipe_resource *pt = &tex->desc.b.b; boolean is_r500 = screen->caps.is_r500; /* Set sampler state. */ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); - if (tex->uses_pitch) { + if (tex->desc.uses_stride_addressing) { /* rectangles love this */ f->format0 |= R300_TX_PITCH_EN; - f->format2 = (tex->hwpitch[0] - 1) & 0x1fff; + f->format2 = (tex->desc.stride_in_pixels[0] - 1) & 0x1fff; } else { - /* power of two textures (3D, mipmaps, and no pitch) */ + /* Power of two textures (3D, mipmaps, and no pitch), + * also NPOT textures with a width being POT. */ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); } @@ -580,8 +580,8 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, } } - f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile); + f->tile_config = R300_TXO_MACRO_TILE(tex->desc.macrotile[0]) | + R300_TXO_MICRO_TILE(tex->desc.microtile); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -590,23 +590,23 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, unsigned i; /* Set framebuffer state. */ - if (util_format_is_depth_or_stencil(tex->b.b.format)) { - for (i = 0; i <= tex->b.b.last_level; i++) { + if (util_format_is_depth_or_stencil(tex->desc.b.b.format)) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | - R300_DEPTHMICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + R300_DEPTHMACROTILE(tex->desc.macrotile[i]) | + R300_DEPTHMICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->desc.b.b.format); } else { - for (i = 0; i <= tex->b.b.last_level; i++) { + for (i = 0; i <= tex->desc.b.b.last_level; i++) { tex->fb_state.pitch[i] = - tex->hwpitch[i] | - r300_translate_colorformat(tex->b.b.format) | - R300_COLOR_TILE(tex->mip_macrotile[i]) | - R300_COLOR_MICROTILE(tex->microtile); + tex->desc.stride_in_pixels[i] | + r300_translate_colorformat(tex->desc.b.b.format) | + R300_COLOR_TILE(tex->desc.macrotile[i]) | + R300_COLOR_MICROTILE(tex->desc.microtile); } - tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->desc.b.b.format); } } @@ -626,282 +626,6 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex)); } -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face) -{ - unsigned offset = tex->offset[level]; - - switch (tex->b.b.target) { - case PIPE_TEXTURE_3D: - assert(face == 0); - return offset + zslice * tex->layer_size[level]; - - case PIPE_TEXTURE_CUBE: - assert(zslice == 0); - return offset + face * tex->layer_size[level]; - - default: - assert(zslice == 0 && face == 0); - return offset; - } -} - -/* Returns the number of pixels that the texture should be aligned to - * in the given dimension. */ -static unsigned r300_get_pixel_alignment(struct r300_texture *tex, - enum r300_buffer_tiling macrotile, - enum r300_dim dim) -{ - static const unsigned table[2][5][3][2] = - { - { - /* Macro: linear linear linear - Micro: linear tiled square-tiled */ - {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ - {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - }, - { - /* Macro: tiled tiled tiled - Micro: linear tiled square-tiled */ - {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ - {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ - {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ - {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ - {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ - } - }; - static const unsigned aa_block[2] = {4, 8}; - unsigned res = 0; - unsigned pixsize = util_format_get_blocksize(tex->b.b.format); - - assert(macrotile <= R300_BUFFER_TILED); - assert(tex->microtile <= R300_BUFFER_SQUARETILED); - assert(pixsize <= 16); - assert(dim <= DIM_HEIGHT); - - if (tex->b.b.nr_samples > 1) { - /* Multisampled textures have their own alignment scheme. */ - if (pixsize == 4) - res = aa_block[dim]; - } else { - /* Standard alignment. */ - res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; - } - - assert(res); - return res; -} - -/* Return true if macrotiling should be enabled on the miplevel. */ -static boolean r300_texture_macro_switch(struct r300_texture *tex, - unsigned level, - boolean rv350_mode, - enum r300_dim dim) -{ - unsigned tile, texdim; - - tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); - if (dim == DIM_WIDTH) { - texdim = u_minify(tex->b.b.width0, level); - } else { - texdim = u_minify(tex->b.b.height0, level); - } - - /* See TX_FILTER1_n.MACRO_SWITCH. */ - if (rv350_mode) { - return texdim >= tile; - } else { - return texdim > tile; - } -} - -/** - * Return the stride, in bytes, of the texture images of the given texture - * at the given level. - */ -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level) -{ - unsigned tile_width, width, stride; - - if (tex->stride_override) - return tex->stride_override; - - /* Check the level. */ - if (level > tex->b.b.last_level) { - SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", - __FUNCTION__, level, tex->b.b.last_level); - return 0; - } - - width = u_minify(tex->b.b.width0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_WIDTH); - width = align(width, tile_width); - - stride = util_format_get_stride(tex->b.b.format, width); - - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!tex->microtile && !tex->mip_macrotile[level] && - (screen->caps.family == CHIP_FAMILY_RS600 || - screen->caps.family == CHIP_FAMILY_RS690 || - screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; - } - - /* The alignment to 32 bytes is sort of implied by the layout... */ - return stride; - } else { - return align(util_format_get_stride(tex->b.b.format, width), 32); - } -} - -static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, - unsigned level) -{ - unsigned height, tile_height; - - height = u_minify(tex->b.b.height0, level); - - if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], - DIM_HEIGHT); - height = align(height, tile_height); - - /* This is needed for the kernel checker, unfortunately. */ - height = util_next_power_of_two(height); - } - - return util_format_get_nblocksy(tex->b.b.format, height); -} - -static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, - struct r300_texture *tex) -{ - /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures - * incorrectly. This is a workaround to prevent CS from being rejected. */ - - unsigned i, size; - - if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && - tex->b.b.target == PIPE_TEXTURE_3D && - tex->b.b.last_level > 0) { - size = 0; - - for (i = 0; i <= tex->b.b.last_level; i++) { - size += r300_texture_get_stride(screen, tex, i) * - r300_texture_get_nblocksy(tex, i); - } - - size *= tex->b.b.depth0; - tex->size = size; - } -} - -static void r300_setup_miptree(struct r300_screen* screen, - struct r300_texture* tex) -{ - struct pipe_resource* base = &tex->b.b; - unsigned stride, size, layer_size, nblocksy, i; - boolean rv350_mode = screen->caps.is_rv350; - - SCREEN_DBG(screen, DBG_TEXALLOC, - "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); - - for (i = 0; i <= base->last_level; i++) { - /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = - (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; - - stride = r300_texture_get_stride(screen, tex, i); - nblocksy = r300_texture_get_nblocksy(tex, i); - layer_size = stride * nblocksy; - - if (base->nr_samples) { - layer_size *= base->nr_samples; - } - - if (base->target == PIPE_TEXTURE_CUBE) - size = layer_size * 6; - else - size = layer_size * u_minify(base->depth0, i); - - tex->offset[i] = tex->size; - tex->size = tex->offset[i] + size; - tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / util_format_get_blocksize(base->format); - tex->hwpitch[i] = - tex->pitch[i] * util_format_get_blockwidth(base->format); - - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", - i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride, tex->size, - tex->mip_macrotile[i] ? "TRUE" : "FALSE"); - } -} - -static void r300_setup_flags(struct r300_texture* tex) -{ - tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) || - !util_is_power_of_two(tex->b.b.height0) || - tex->stride_override; -} - -static void r300_setup_tiling(struct pipe_screen *screen, - struct r300_texture *tex) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - enum pipe_format format = tex->b.b.format; - boolean rv350_mode = r300_screen(screen)->caps.is_rv350; - boolean is_zb = util_format_is_depth_or_stencil(format); - boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - - if (!util_format_is_plain(format)) { - return; - } - - /* If height == 1, disable microtiling except for zbuffer. */ - if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) { - return; - } - - /* Set microtiling. */ - switch (util_format_get_blocksize(format)) { - case 1: - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - case 8: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - } - break; - } - - if (dbg_no_tiling) { - return; - } - - /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { - tex->macrotile = R300_BUFFER_TILED; - } -} - static unsigned r300_texture_is_referenced(struct pipe_context *context, struct pipe_resource *texture, unsigned face, unsigned level) @@ -909,7 +633,8 @@ static unsigned r300_texture_is_referenced(struct pipe_context *context, struct r300_context *r300 = r300_context(context); struct r300_texture *rtex = (struct r300_texture *)texture; - if (r300->rws->is_buffer_referenced(r300->rws, rtex->buffer, R300_REF_CS)) + if (r300->rws->cs_is_buffer_referenced(r300->cs, + rtex->buffer, R300_REF_CS)) return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; return PIPE_UNREFERENCED; @@ -936,12 +661,11 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, return FALSE; } - whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - - return rws->buffer_get_handle(rws, tex->buffer, whandle); + return rws->buffer_get_handle(rws, tex->buffer, + tex->desc.stride_in_bytes[0], whandle); } -struct u_resource_vtbl r300_texture_vtbl = +struct u_resource_vtbl r300_texture_vtbl = { r300_texture_get_handle, /* get_handle */ r300_texture_destroy, /* resource_destroy */ @@ -954,17 +678,69 @@ struct u_resource_vtbl r300_texture_vtbl = u_default_transfer_inline_write /* transfer_inline_write */ }; -/* Create a new texture. */ -struct pipe_resource* r300_texture_create(struct pipe_screen* screen, - const struct pipe_resource* base) +/* The common texture constructor. */ +static struct r300_texture* +r300_texture_create_object(struct r300_screen *rscreen, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size, + struct r300_winsys_buffer *buffer) { - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; - + struct r300_winsys_screen *rws = rscreen->rws; + struct r300_texture *tex = CALLOC_STRUCT(r300_texture); if (!tex) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + return NULL; + } + + /* Initialize the descriptor. */ + if (!r300_texture_desc_init(rscreen, &tex->desc, base, + microtile, macrotile, + stride_in_bytes_override, + max_buffer_size)) { + if (buffer) + rws->buffer_reference(rws, &buffer, NULL); + FREE(tex); return NULL; } + /* Initialize the hardware state. */ + r300_texture_setup_immutable_state(rscreen, tex); + r300_texture_setup_fb_state(rscreen, tex); + + tex->desc.b.vtbl = &r300_texture_vtbl; + pipe_reference_init(&tex->desc.b.b.reference, 1); + tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? + R300_DOMAIN_GTT : + R300_DOMAIN_VRAM | R300_DOMAIN_GTT; + tex->buffer = buffer; + + /* Create the backing buffer if needed. */ + if (!tex->buffer) { + tex->buffer = rws->buffer_create(rws, tex->desc.size_in_bytes, 2048, + base->bind, base->usage, tex->domain); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + rws->buffer_set_tiling(rws, tex->buffer, + tex->desc.microtile, tex->desc.macrotile[0], + tex->desc.stride_in_bytes[0]); + + return tex; +} + +/* Create a new texture. */ +struct pipe_resource *r300_texture_create(struct pipe_screen *screen, + const struct pipe_resource *base) +{ + struct r300_screen *rscreen = r300_screen(screen); + enum r300_buffer_tiling microtile, macrotile; /* Refuse to create a texture with size 0. */ if (!base->width0 || @@ -974,58 +750,70 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, fprintf(stderr, "r300: texture_create: " "Got invalid texture dimensions: %ix%ix%i\n", base->width0, base->height0, base->depth0); - FREE(tex); return NULL; } - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; + if ((base->flags & R300_RESOURCE_FLAG_TRANSFER) || + (base->bind & PIPE_BIND_SCANOUT)) { + microtile = R300_BUFFER_LINEAR; + macrotile = R300_BUFFER_LINEAR; + } else { + microtile = R300_BUFFER_SELECT_LAYOUT; + macrotile = R300_BUFFER_SELECT_LAYOUT; + } + + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + 0, 0, NULL); +} + +struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *base, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; + struct r300_screen *rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + enum r300_buffer_tiling microtile, macrotile; + unsigned stride, size; - r300_setup_flags(tex); - if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) && - !(base->bind & PIPE_BIND_SCANOUT)) { - r300_setup_tiling(screen, tex); + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth0 != 1 || + base->last_level != 0) { + return NULL; } - r300_setup_miptree(rscreen, tex); - r300_texture_3d_fix_mipmapping(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - tex->hwpitch[0], - base->width0, base->height0, base->depth0, base->last_level, - tex->size, - util_format_short_name(base->format)); + buffer = rws->buffer_from_handle(rws, whandle, &stride, &size); + if (!buffer) + return NULL; - tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : - R300_DOMAIN_VRAM; + rws->buffer_get_tiling(rws, buffer, µtile, ¯otile); - tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain, - tex->size); + /* Enforce a microtiled zbuffer. */ + if (util_format_is_depth_or_stencil(base->format) && + microtile == R300_BUFFER_LINEAR) { + switch (util_format_get_blocksize(base->format)) { + case 4: + microtile = R300_BUFFER_TILED; + break; - if (!tex->buffer) { - FREE(tex); - return NULL; + case 2: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) + microtile = R300_BUFFER_SQUARETILED; + break; + } } - rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->macrotile); - - return (struct pipe_resource*)tex; + return (struct pipe_resource*) + r300_texture_create_object(rscreen, base, microtile, macrotile, + stride, size, buffer); } /* Not required to implement u_resource_vtbl, consider moving to another file: */ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, - struct pipe_resource* texture, + struct pipe_resource* texture, unsigned face, unsigned level, unsigned zslice, @@ -1035,6 +823,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { + uint32_t offset, tile_height; + pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); surface->base.format = texture->format; @@ -1046,10 +836,49 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->base.level = level; surface->buffer = tex->buffer; + + /* Prefer VRAM if there are multiple domains to choose from. */ surface->domain = tex->domain; - surface->offset = r300_texture_get_offset(tex, level, zslice, face); + if (surface->domain & R300_DOMAIN_VRAM) + surface->domain &= ~R300_DOMAIN_GTT; + + surface->offset = r300_texture_get_offset(&tex->desc, + level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; + + /* Parameters for the CBZB clear. */ + surface->cbzb_allowed = tex->desc.cbzb_allowed[level]; + surface->cbzb_width = align(surface->base.width, 64); + + /* Height must be aligned to the size of a tile. */ + tile_height = r300_get_pixel_alignment(tex->desc.b.b.format, + tex->desc.b.b.nr_samples, + tex->desc.microtile, + tex->desc.macrotile[level], + DIM_HEIGHT); + + surface->cbzb_height = align((surface->base.height + 1) / 2, + tile_height); + + /* Offset must be aligned to 2K and must point at the beginning + * of a scanline. */ + offset = surface->offset + + tex->desc.stride_in_bytes[level] * surface->cbzb_height; + surface->cbzb_midpoint_offset = offset & ~2047; + + surface->cbzb_pitch = surface->pitch & 0x1ffffc; + + if (util_format_get_blocksizebits(surface->base.format) == 32) + surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + else + surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; + + SCREEN_DBG(r300_screen(screen), DBG_CBZB, + "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", + surface->cbzb_width, surface->cbzb_height, + offset & 2047, + tex->desc.macrotile[level] ? "YES" : " NO"); } return &surface->base; @@ -1062,88 +891,3 @@ void r300_tex_surface_destroy(struct pipe_surface* s) pipe_resource_reference(&s->texture, NULL); FREE(s); } - -struct pipe_resource* -r300_texture_from_handle(struct pipe_screen* screen, - const struct pipe_resource* base, - struct winsys_handle *whandle) -{ - struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; - struct r300_screen* rscreen = r300_screen(screen); - struct r300_winsys_buffer *buffer; - struct r300_texture* tex; - boolean override_zb_flags; - - /* Support only 2D textures without mipmaps */ - if (base->target != PIPE_TEXTURE_2D || - base->depth0 != 1 || - base->last_level != 0) { - return NULL; - } - - buffer = rws->buffer_from_handle(rws, whandle->handle); - if (!buffer) { - return NULL; - } - - tex = CALLOC_STRUCT(r300_texture); - if (!tex) { - return NULL; - } - - tex->b.b = *base; - tex->b.vtbl = &r300_texture_vtbl; - pipe_reference_init(&tex->b.b.reference, 1); - tex->b.b.screen = screen; - tex->domain = R300_DOMAIN_VRAM; - - tex->stride_override = whandle->stride; - - /* one ref already taken */ - tex->buffer = buffer; - - rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); - r300_setup_flags(tex); - SCREEN_DBG(rscreen, DBG_TEX, - "r300: texture_from_handle: Macro: %s, Micro: %s, " - "Pitch: % 4i, Dim: %ix%i, Format: %s\n", - tex->macrotile ? "YES" : " NO", - tex->microtile ? "YES" : " NO", - whandle->stride / util_format_get_blocksize(base->format), - base->width0, base->height0, - util_format_short_name(base->format)); - - /* Enforce microtiled zbuffer. */ - override_zb_flags = util_format_is_depth_or_stencil(base->format) && - tex->microtile == R300_BUFFER_LINEAR; - - if (override_zb_flags) { - switch (util_format_get_blocksize(base->format)) { - case 4: - tex->microtile = R300_BUFFER_TILED; - break; - - case 2: - if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { - tex->microtile = R300_BUFFER_SQUARETILED; - break; - } - /* Pass through. */ - - default: - override_zb_flags = FALSE; - } - } - - r300_setup_miptree(rscreen, tex); - r300_texture_setup_immutable_state(rscreen, tex); - r300_texture_setup_fb_state(rscreen, tex); - - if (override_zb_flags) { - rws->buffer_set_tiling(rws, tex->buffer, - tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), - tex->microtile, - tex->macrotile); - } - return (struct pipe_resource*)tex; -} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 99e7694254e..a4524320fda 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -35,16 +35,11 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, const unsigned char *swizzle_view); uint32_t r300_translate_texformat(enum pipe_format format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean is_r500); uint32_t r500_tx_format_msb_bit(enum pipe_format format); -unsigned r300_texture_get_stride(struct r300_screen* screen, - struct r300_texture* tex, unsigned level); - -unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, - unsigned zslice, unsigned face); - void r300_texture_reinterpret_format(struct pipe_screen *screen, struct pipe_resource *tex, enum pipe_format new_format); diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c new file mode 100644 index 00000000000..343089bf2c5 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -0,0 +1,465 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture_desc.h" + +#include "r300_context.h" +#include "r300_winsys.h" + +#include "util/u_format.h" + +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) +{ + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned tile = 0; + unsigned pixsize = util_format_get_blocksize(format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (num_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + tile = aa_block[dim]; + /* XXX FP16 AA. */ + } else { + /* Standard alignment. */ + tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; + } + + assert(tile); + return tile; +} + +/* Return true if macrotiling should be enabled on the miplevel. */ +static boolean r300_texture_macro_switch(struct r300_texture_desc *desc, + unsigned level, + boolean rv350_mode, + enum r300_dim dim) +{ + unsigned tile, texdim; + + tile = r300_get_pixel_alignment(desc->b.b.format, desc->b.b.nr_samples, + desc->microtile, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { + texdim = u_minify(desc->b.b.width0, level); + } else { + texdim = u_minify(desc->b.b.height0, level); + } + + /* See TX_FILTER1_n.MACRO_SWITCH. */ + if (rv350_mode) { + return texdim >= tile; + } else { + return texdim > tile; + } +} + +/** + * Return the stride, in bytes, of the texture image of the given texture + * at the given level. + */ +static unsigned r300_texture_get_stride(struct r300_screen *screen, + struct r300_texture_desc *desc, + unsigned level) +{ + unsigned tile_width, width, stride; + + if (desc->stride_in_bytes_override) + return desc->stride_in_bytes_override; + + /* Check the level. */ + if (level > desc->b.b.last_level) { + SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", + __FUNCTION__, level, desc->b.b.last_level); + return 0; + } + + width = u_minify(desc->b.b.width0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_width = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_WIDTH); + width = align(width, tile_width); + + stride = util_format_get_stride(desc->b.b.format, width); + + /* Some IGPs need a minimum stride of 64 bytes, hmm... + * This doesn't seem to apply to tiled textures, according to r300c. */ + if (!desc->microtile && !desc->macrotile[level] && + (screen->caps.family == CHIP_FAMILY_RS600 || + screen->caps.family == CHIP_FAMILY_RS690 || + screen->caps.family == CHIP_FAMILY_RS740)) { + return stride < 64 ? 64 : stride; + } + + /* The alignment to 32 bytes is sort of implied by the layout... */ + return stride; + } else { + return align(util_format_get_stride(desc->b.b.format, width), 32); + } +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture_desc *desc, + unsigned level, + boolean *out_aligned_for_cbzb) +{ + unsigned height, tile_height; + + height = u_minify(desc->b.b.height0, level); + + if (util_format_is_plain(desc->b.b.format)) { + tile_height = r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + height = align(height, tile_height); + + /* This is needed for the kernel checker, unfortunately. */ + if ((desc->b.b.target != PIPE_TEXTURE_1D && + desc->b.b.target != PIPE_TEXTURE_2D) || + desc->b.b.last_level != 0) { + height = util_next_power_of_two(height); + } + + /* See if the CBZB clear can be used on the buffer, + * taking the texture size into account. */ + if (out_aligned_for_cbzb) { + if (desc->macrotile[level]) { + /* When clearing, the layer (width*height) is horizontally split + * into two, and the upper and lower halves are cleared by the CB + * and ZB units, respectively. Therefore, the number of macrotiles + * in the Y direction must be even. */ + + /* Align the height so that there is an even number of macrotiles. + * Do so for 3 or more macrotiles in the Y direction. */ + if (level == 0 && desc->b.b.last_level == 0 && + (desc->b.b.target == PIPE_TEXTURE_1D || + desc->b.b.target == PIPE_TEXTURE_2D) && + height >= tile_height * 3) { + height = align(height, tile_height * 2); + } + + *out_aligned_for_cbzb = height % (tile_height * 2) == 0; + } else { + *out_aligned_for_cbzb = FALSE; + } + } + } + + return util_format_get_nblocksy(desc->b.b.format, height); +} + +static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures + * incorrectly. This is a workaround to prevent CS from being rejected. */ + + unsigned i, size; + + if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) && + desc->b.b.target == PIPE_TEXTURE_3D && + desc->b.b.last_level > 0) { + size = 0; + + for (i = 0; i <= desc->b.b.last_level; i++) { + size += desc->stride_in_bytes[i] * + r300_texture_get_nblocksy(desc, i, FALSE); + } + + size *= desc->b.b.depth0; + desc->size_in_bytes = size; + } +} + +/* Get a width in pixels from a stride in bytes. */ +static unsigned stride_to_width(enum pipe_format format, + unsigned stride_in_bytes) +{ + return (stride_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + +static void r300_setup_miptree(struct r300_screen *screen, + struct r300_texture_desc *desc, + boolean align_for_cbzb) +{ + struct pipe_resource *base = &desc->b.b; + unsigned stride, size, layer_size, nblocksy, i; + boolean rv350_mode = screen->caps.is_rv350; + boolean aligned_for_cbzb; + + desc->size_in_bytes = 0; + + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); + + for (i = 0; i <= base->last_level; i++) { + /* Let's see if this miplevel can be macrotiled. */ + desc->macrotile[i] = + (desc->macrotile[0] == R300_BUFFER_TILED && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, i, rv350_mode, DIM_HEIGHT)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; + + stride = r300_texture_get_stride(screen, desc, i); + + /* Compute the number of blocks in Y, see if the CBZB clear can be + * used on the texture. */ + aligned_for_cbzb = FALSE; + if (align_for_cbzb && desc->cbzb_allowed[i]) + nblocksy = r300_texture_get_nblocksy(desc, i, &aligned_for_cbzb); + else + nblocksy = r300_texture_get_nblocksy(desc, i, NULL); + + layer_size = stride * nblocksy; + + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); + + desc->offset_in_bytes[i] = desc->size_in_bytes; + desc->size_in_bytes = desc->offset_in_bytes[i] + size; + desc->layer_size_in_bytes[i] = layer_size; + desc->stride_in_bytes[i] = stride; + desc->stride_in_pixels[i] = stride_to_width(desc->b.b.format, stride); + desc->cbzb_allowed[i] = desc->cbzb_allowed[i] && aligned_for_cbzb; + + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride, desc->size_in_bytes, + desc->macrotile[i] ? "TRUE" : "FALSE"); + } +} + +static void r300_setup_flags(struct r300_texture_desc *desc) +{ + desc->uses_stride_addressing = + !util_is_power_of_two(desc->b.b.width0) || + !util_is_power_of_two(desc->b.b.height0) || + (desc->stride_in_bytes_override && + stride_to_width(desc->b.b.format, + desc->stride_in_bytes_override) != desc->b.b.width0); + + desc->is_npot = + desc->uses_stride_addressing || + !util_is_power_of_two(desc->b.b.height0); +} + +static void r300_setup_cbzb_flags(struct r300_screen *rscreen, + struct r300_texture_desc *desc) +{ + unsigned i, bpp; + boolean first_level_valid; + + bpp = util_format_get_blocksizebits(desc->b.b.format); + + /* 1) The texture must be point-sampled, + * 2) The depth must be 16 or 32 bits. + * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + first_level_valid = desc->b.b.nr_samples <= 1 && + (bpp == 16 || bpp == 32) && + desc->macrotile[0]; + + for (i = 0; i <= desc->b.b.last_level; i++) + desc->cbzb_allowed[i] = first_level_valid && desc->macrotile[i]; +} + +static void r300_setup_tiling(struct r300_screen *screen, + struct r300_texture_desc *desc) +{ + struct r300_winsys_screen *rws = screen->rws; + enum pipe_format format = desc->b.b.format; + boolean rv350_mode = screen->caps.is_rv350; + boolean is_zb = util_format_is_depth_or_stencil(format); + boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); + + if (!util_format_is_plain(format)) { + return; + } + + /* If height == 1, disable microtiling except for zbuffer. */ + if (!is_zb && (desc->b.b.height0 == 1 || dbg_no_tiling)) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + desc->microtile = R300_BUFFER_TILED; + break; + + case 2: + case 8: + if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) { + desc->microtile = R300_BUFFER_SQUARETILED; + } + break; + } + + if (dbg_no_tiling) { + return; + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(desc, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(desc, 0, rv350_mode, DIM_HEIGHT)) { + desc->macrotile[0] = R300_BUFFER_TILED; + } +} + +static void r300_tex_print_info(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const char *func) +{ + fprintf(stderr, + "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " + "LastLevel: %i, Size: %i, Format: %s\n", + func, + desc->macrotile[0] ? "YES" : " NO", + desc->microtile ? "YES" : " NO", + desc->stride_in_pixels[0], + desc->b.b.width0, desc->b.b.height0, desc->b.b.depth0, + desc->b.b.last_level, desc->size_in_bytes, + util_format_short_name(desc->b.b.format)); +} + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size) +{ + desc->b.b = *base; + desc->b.b.screen = &rscreen->screen; + + desc->stride_in_bytes_override = stride_in_bytes_override; + + if (microtile == R300_BUFFER_SELECT_LAYOUT || + macrotile == R300_BUFFER_SELECT_LAYOUT) { + r300_setup_tiling(rscreen, desc); + } else { + desc->microtile = microtile; + desc->macrotile[0] = macrotile; + assert(desc->b.b.last_level == 0); + } + + r300_setup_flags(desc); + r300_setup_cbzb_flags(rscreen, desc); + + /* Setup the miptree description. */ + r300_setup_miptree(rscreen, desc, TRUE); + /* If the required buffer size is larger the given max size, + * try again without the alignment for the CBZB clear. */ + if (max_buffer_size && desc->size_in_bytes > max_buffer_size) { + r300_setup_miptree(rscreen, desc, FALSE); + } + + r300_texture_3d_fix_mipmapping(rscreen, desc); + + if (max_buffer_size) { + /* Make sure the buffer we got is large enough. */ + if (desc->size_in_bytes > max_buffer_size) { + fprintf(stderr, "r300: texture_from_handle: The buffer is not " + "large enough. Got: %i, Need: %i, Info:\n", + max_buffer_size, desc->size_in_bytes); + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + return FALSE; + } + + desc->buffer_size_in_bytes = max_buffer_size; + } else { + desc->buffer_size_in_bytes = desc->size_in_bytes; + } + + if (SCREEN_DBG_ON(rscreen, DBG_TEX)) + r300_tex_print_info(rscreen, desc, "texture_from_handle"); + + return TRUE; +} + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face) +{ + unsigned offset = desc->offset_in_bytes[level]; + + switch (desc->b.b.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * desc->layer_size_in_bytes[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * desc->layer_size_in_bytes[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} diff --git a/src/gallium/drivers/r300/r300_texture_desc.h b/src/gallium/drivers/r300/r300_texture_desc.h new file mode 100644 index 00000000000..95de66f6549 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture_desc.h @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_DESC_H +#define R300_TEXTURE_DESC_H + +#include "r300_defines.h" + +struct pipe_resource; +struct r300_screen; +struct r300_texture_desc; +struct r300_texture; + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 +}; + +unsigned r300_get_pixel_alignment(enum pipe_format format, + unsigned num_samples, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + enum r300_dim dim); + +boolean r300_texture_desc_init(struct r300_screen *rscreen, + struct r300_texture_desc *desc, + const struct pipe_resource *base, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride_in_bytes_override, + unsigned max_buffer_size); + +unsigned r300_texture_get_offset(struct r300_texture_desc *desc, + unsigned level, unsigned zslice, + unsigned face); + +#endif diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 5394e04f727..51b2c555502 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -71,7 +71,7 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_COS: return RC_OPCODE_COS; case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; case TGSI_OPCODE_DDY: return RC_OPCODE_DDY; - /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */ + case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */ /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */ /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index d41f2588369..e9333b35ef5 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_transfer.h" -#include "r300_texture.h" +#include "r300_texture_desc.h" #include "r300_screen_buffer.h" #include "util/u_memory.h" @@ -35,8 +35,8 @@ struct r300_transfer { /* Offset from start of buffer. */ unsigned offset; - /* Detiled texture. */ - struct r300_texture *detiled_texture; + /* Linear texture. */ + struct r300_texture *linear_texture; }; /* Convenience cast wrapper. */ @@ -57,7 +57,7 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, + ctx->resource_copy_region(ctx, &r300transfer->linear_texture->desc.b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, @@ -77,9 +77,11 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, ctx->resource_copy_region(ctx, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, - &r300transfer->detiled_texture->b.b, subsrc, + &r300transfer->linear_texture->desc.b.b, subsrc, 0, 0, 0, transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); } struct pipe_transfer* @@ -89,19 +91,21 @@ r300_texture_get_transfer(struct pipe_context *ctx, unsigned usage, const struct pipe_box *box) { + struct r300_context *r300 = r300_context(ctx); struct r300_texture *tex = r300_texture(texture); - struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; boolean referenced_cs, referenced_hw, blittable; - referenced_cs = r300screen->rws->is_buffer_referenced( - r300screen->rws, tex->buffer, R300_REF_CS); + referenced_cs = + r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_CS); if (referenced_cs) { referenced_hw = TRUE; } else { - referenced_hw = r300screen->rws->is_buffer_referenced( - r300screen->rws, tex->buffer, R300_REF_HW); + referenced_hw = + r300->rws->cs_is_buffer_referenced(r300->cs, + tex->buffer, R300_REF_HW); } blittable = ctx->screen->is_format_supported( @@ -119,7 +123,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* If the texture is tiled, we must create a temporary detiled texture * for this transfer. * Also make write transfers pipelined. */ - if (tex->microtile || tex->macrotile || + if (tex->desc.microtile || tex->desc.macrotile[sr.level] || ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; @@ -144,23 +148,23 @@ r300_texture_get_transfer(struct pipe_context *ctx, } /* Create the temporary texture. */ - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* Oh crap, the thing can't create the texture. * Let's flush and try again. */ ctx->flush(ctx, 0, NULL); - trans->detiled_texture = r300_texture( + trans->linear_texture = r300_texture( ctx->screen->resource_create(ctx->screen, &base)); - if (!trans->detiled_texture) { + if (!trans->linear_texture) { /* For linear textures, it's safe to fallback to * an unpipelined transfer. */ - if (!tex->microtile && !tex->macrotile) { + if (!tex->desc.microtile && !tex->desc.macrotile[sr.level]) { goto unpipelined; } @@ -172,8 +176,8 @@ r300_texture_get_transfer(struct pipe_context *ctx, } } - assert(!trans->detiled_texture->microtile && - !trans->detiled_texture->macrotile); + assert(!trans->linear_texture->desc.microtile && + !trans->linear_texture->desc.macrotile[0]); /* Set the stride. * @@ -183,7 +187,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * right thing internally. */ trans->transfer.stride = - r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + trans->linear_texture->desc.stride_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is @@ -198,11 +202,11 @@ r300_texture_get_transfer(struct pipe_context *ctx, unpipelined: /* Unpipelined transfer. */ - trans->transfer.stride = - r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->transfer.stride = tex->desc.stride_in_bytes[sr.level]; + trans->offset = r300_texture_get_offset(&tex->desc, + sr.level, box->z, sr.face); - if (referenced_cs && (usage & PIPE_TRANSFER_READ)) + if (referenced_cs) ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); return &trans->transfer; } @@ -214,13 +218,13 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, { struct r300_transfer *r300transfer = r300_transfer(trans); - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { if (trans->usage & PIPE_TRANSFER_WRITE) { r300_copy_into_tiled_texture(ctx, r300transfer); } pipe_resource_reference( - (struct pipe_resource**)&r300transfer->detiled_texture, NULL); + (struct pipe_resource**)&r300transfer->linear_texture, NULL); } pipe_resource_reference(&trans->resource, NULL); FREE(trans); @@ -229,21 +233,23 @@ void r300_texture_transfer_destroy(struct pipe_context *ctx, void* r300_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { + struct r300_context *r300 = r300_context(ctx); struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); char *map; - enum pipe_format format = tex->b.b.format; + enum pipe_format format = tex->desc.b.b.format; - if (r300transfer->detiled_texture) { + if (r300transfer->linear_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ return rws->buffer_map(rws, - r300transfer->detiled_texture->buffer, + r300transfer->linear_texture->buffer, + r300->cs, transfer->usage); } else { /* Tiling is disabled. */ - map = rws->buffer_map(rws, tex->buffer, + map = rws->buffer_map(rws, tex->buffer, r300->cs, transfer->usage); if (!map) { @@ -263,8 +269,8 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx, struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = r300_texture(transfer->resource); - if (r300transfer->detiled_texture) { - rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + if (r300transfer->linear_texture) { + rws->buffer_unmap(rws, r300transfer->linear_texture->buffer); } else { rws->buffer_unmap(rws, tex->buffer); } diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c index d64040b8911..2939963c355 100644 --- a/src/gallium/drivers/r300/r300_vs_draw.c +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -185,7 +185,7 @@ static void transform_decl(struct tgsi_transform_context *ctx, if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, TGSI_INTERPOLATE_LINEAR); - vsctx->color_used[2] = TRUE; + vsctx->bcolor_used[0] = TRUE; } /* One more case is handled in insert_trailing_bcolor. */ break; diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 77c1c13ef9a..ff11546a647 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,17 +24,25 @@ #ifndef R300_WINSYS_H #define R300_WINSYS_H -/* The public interface header for the r300 pipe driver. - * Any winsys hosting this pipe needs to implement r300_winsys and then - * call r300_create_screen to start things. */ +/* The public winsys interface header for the r300 pipe driver. + * Any winsys hosting this pipe needs to implement r300_winsys_screen and then + * call r300_screen_create to start things. */ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "r300_defines.h" +struct r300_winsys_screen; + struct r300_winsys_buffer; +struct r300_winsys_cs { + uint32_t *ptr; /* Pointer to the beginning of the CS. */ + unsigned cdw; /* Number of used dwords. */ + unsigned ndw; /* Size of the CS in dwords. */ +}; + enum r300_value_id { R300_VID_PCI_ID, R300_VID_GB_PIPES, @@ -48,121 +57,251 @@ enum r300_reference_domain { /* bitfield */ }; struct r300_winsys_screen { + /** + * Destroy this winsys. + * + * \param ws The winsys this function is called from. + */ void (*destroy)(struct r300_winsys_screen *ws); - + /** + * Query a system value from a winsys. + * + * \param ws The winsys this function is called from. + * \param vid One of the R300_VID_* enums. + */ + uint32_t (*get_value)(struct r300_winsys_screen *ws, + enum r300_value_id vid); + + /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * * Remember that gallium gets to choose the interface it needs, and the * window systems must then implement that interface (rather than the * other way around...). + *************************************************************************/ + + /** + * Create a buffer object. * - * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This - * usage argument is only an optimization hint, not a guarantee, therefore - * proper behavior must be observed in all circumstances. - * - * alignment indicates the client's alignment requirements, eg for - * SSE instructions. + * \param ws The winsys this function is called from. + * \param size The size to allocate. + * \param alignment An alignment of the buffer in memory. + * \param bind A bitmask of the PIPE_BIND_* flags. + * \param usage A bitmask of the PIPE_USAGE_* flags. + * \param domain A bitmask of the R300_DOMAIN_* flags. + * \return The created buffer object. */ struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, - unsigned alignment, - unsigned usage, - enum r300_buffer_domain domain, - unsigned size); + unsigned size, + unsigned alignment, + unsigned bind, + unsigned usage, + enum r300_buffer_domain domain); /** - * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. + * Reference a buffer object (assign with reference counting). + * + * \param ws The winsys this function is called from. + * \param pdst A destination pointer to set the source buffer to. + * \param src A source buffer object. */ - void *(*buffer_map)( struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf, - unsigned usage); + void (*buffer_reference)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src); - void (*buffer_unmap)( struct r300_winsys_screen *ws, - struct r300_winsys_buffer *buf ); + /** + * Map the entire data store of a buffer object into the client's address + * space. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to map. + * \param cs A command stream to flush if the buffer is referenced by it. + * \param usage A bitmask of the PIPE_TRANSFER_* flags. + * \return The pointer at the beginning of the buffer. + */ + void *(*buffer_map)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + struct r300_winsys_cs *cs, + enum pipe_transfer_usage usage); - void (*buffer_destroy)( struct r300_winsys_buffer *buf ); + /** + * Unmap a buffer object from the client's address space. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to unmap. + */ + void (*buffer_unmap)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); + /** + * Wait for a buffer object until it is not used by a GPU. This is + * equivalent to a fence placed after the last command using the buffer, + * and synchronizing to the fence. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to wait for. + */ + void (*buffer_wait)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf); - void (*buffer_reference)(struct r300_winsys_screen *rws, - struct r300_winsys_buffer **pdst, - struct r300_winsys_buffer *src); + /** + * Return tiling flags describing a memory layout of a buffer object. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to get the flags from. + * \param macrotile A pointer to the return value of the microtile flag. + * \param microtile A pointer to the return value of the macrotile flag. + * + * \note microtile and macrotile are not bitmasks! + */ + void (*buffer_get_tiling)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + enum r300_buffer_tiling *microtile, + enum r300_buffer_tiling *macrotile); - void (*buffer_wait)(struct r300_winsys_screen *rws, - struct r300_winsys_buffer *buf); + /** + * Set tiling flags describing a memory layout of a buffer object. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to set the flags for. + * \param macrotile A macrotile flag. + * \param microtile A microtile flag. + * \param stride A stride of the buffer in bytes, for texturing. + * + * \note microtile and macrotile are not bitmasks! + */ + void (*buffer_set_tiling)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + enum r300_buffer_tiling microtile, + enum r300_buffer_tiling macrotile, + unsigned stride); - /* Add a pipe_resource to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + /** + * Get a winsys buffer from a winsys handle. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param whandle A winsys handle pointer as was received from a state + * tracker. + * \param stride The returned buffer stride in bytes. + * \param size The returned buffer size. + */ + struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *ws, + struct winsys_handle *whandle, + unsigned *stride, + unsigned *size); + /** + * Get a winsys handle from a winsys buffer. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param buf A winsys buffer object to get the handle from. + * \param whandle A winsys handle pointer. + * \param stride A stride of the buffer in bytes, for texturing. + * \return TRUE on success. + */ + boolean (*buffer_get_handle)(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned stride, + struct winsys_handle *whandle); - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct r300_winsys_screen* winsys); + /************************************************************************** + * Command submission. + * + * Each pipe context should create its own command stream and submit + * commands independently of other contexts. + *************************************************************************/ - /* Return the number of free dwords in CS. */ - unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys); + /** + * Create a command stream. + * + * \param ws The winsys this function is called from. + */ + struct r300_winsys_cs *(*cs_create)(struct r300_winsys_screen *ws); - /* Return the pointer to the first free dword in CS and assume a pipe - * driver wants to fill "count" dwords. */ - uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys, - unsigned count); + /** + * Destroy a command stream. + * + * \param cs A command stream to destroy. + */ + void (*cs_destroy)(struct r300_winsys_cs *cs); - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + /** + * Add a buffer object to the list of buffers to validate. + * + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask + * of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask + * of the R300_DOMAIN_* flags. + */ + void (*cs_add_buffer)(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); - /* Write a table of dwords to the command buffer. */ - void (*write_cs_table)(struct r300_winsys_screen* winsys, - const void *dwords, unsigned count); + /** + * Revalidate all currently set up winsys buffers. + * Returns TRUE if a flush is required. + * + * \param cs A command stream to validate. + */ + boolean (*cs_validate)(struct r300_winsys_cs *cs); - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct r300_winsys_screen *winsys, + /** + * Write a relocated dword to a command buffer. + * + * \param cs A command stream the relocation is written to. + * \param buf A winsys buffer to write the relocation for. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. + */ + void (*cs_write_reloc)(struct r300_winsys_cs *cs, struct r300_winsys_buffer *buf, enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags); - - /* Flush the CS. */ - void (*flush_cs)(struct r300_winsys_screen* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct r300_winsys_screen *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct r300_winsys_screen *winsys); - - void (*buffer_get_tiling)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - enum r300_buffer_tiling *microtiled, - enum r300_buffer_tiling *macrotiled); + enum r300_buffer_domain wd); - void (*buffer_set_tiling)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - uint32_t pitch, - enum r300_buffer_tiling microtiled, - enum r300_buffer_tiling macrotiled); - - uint32_t (*get_value)(struct r300_winsys_screen *winsys, - enum r300_value_id vid); + /** + * Flush a command stream. + * + * \param cs A command stream to flush. + */ + void (*cs_flush)(struct r300_winsys_cs *cs); - struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, - unsigned handle); + /** + * Set a flush callback which is called from winsys when flush is + * required. + * + * \param cs A command stream to set the callback for. + * \param flush A flush callback function associated with the command stream. + * \param user A user pointer that will be passed to the flush callback. + */ + void (*cs_set_flush)(struct r300_winsys_cs *cs, + void (*flush)(void *), + void *user); - boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - struct winsys_handle *whandle); + /** + * Reset the list of buffer objects to validate, usually called + * prior to adding buffer objects for validation. + * + * \param cs A command stream to reset buffers for. + */ + void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, - struct r300_winsys_buffer *buffer, - enum r300_reference_domain domain); + /** + * Return TRUE if a buffer is referenced by a command stream or by hardware + * (i.e. is busy), based on the domain parameter. + * + * \param cs A command stream. + * \param buf A winsys buffer. + * \param domain A bitmask of the R300_REF_* enums. + */ + boolean (*cs_is_buffer_referenced)(struct r300_winsys_cs *cs, + struct r300_winsys_buffer *buf, + enum r300_reference_domain domain); }; -struct r300_winsys_screen * -r300_winsys_screen(struct pipe_screen *screen); - -/* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws); - #endif /* R300_WINSYS_H */ |