diff options
Diffstat (limited to 'src/gallium/drivers/r300')
22 files changed, 796 insertions, 226 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index d125196b6dc..6f8d9abfc81 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -22,6 +22,7 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_winsys.h" #include "util/u_format.h" #include "util/u_pack_color.h" @@ -81,7 +82,7 @@ static void r300_blitter_end(struct r300_context *r300) } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, - const float* rgba) + const float* rgba) { union util_color uc; util_pack_color(rgba, format, &uc); @@ -98,6 +99,9 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + if (r300->z_fastfill) + clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + /* Only color clear allowed, and only one colorbuffer. */ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) return FALSE; @@ -105,6 +109,23 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300, return r300_surface(fb->cbufs[0])->cbzb_allowed; } +static uint32_t r300_depth_clear_value(enum pipe_format format, + double depth, unsigned stencil) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return util_pack_z(format, depth); + + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + return util_pack_z_stencil(format, depth, stencil); + + default: + assert(0); + return 0; + } +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -154,6 +175,22 @@ static void r300_clear(struct pipe_context* pipe, (struct r300_hyperz_state*)r300->hyperz_state.state; uint32_t width = fb->width; uint32_t height = fb->height; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + uint32_t hyperz_dcv = 0; + + /* Enable fast Z clear. + * The zbuffer must be in micro-tiled mode, otherwise it locks up. */ + if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) { + + hyperz_dcv = hyperz->zb_depthclearvalue = + r300_depth_clear_value(fb->zsbuf->format, depth, stencil); + + r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG); + if (r300->z_compression || r300->z_fastfill) + r300->zmask_clear.dirty = TRUE; + if (r300->hiz_enable) + r300->hiz_clear.dirty = TRUE; + } /* Enable CBZB clear. */ if (r300_cbzb_clear_allowed(r300, buffers)) { @@ -181,6 +218,7 @@ static void r300_clear(struct pipe_context* pipe, /* Disable CBZB clear. */ if (r300->cbzb_clear) { r300->cbzb_clear = FALSE; + hyperz->zb_depthclearvalue = hyperz_dcv; r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); } @@ -221,6 +259,29 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe, r300_blitter_end(r300); } +/* Clear a region of a depth stencil surface. */ +static void r300_flush_depth_stencil(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_subresource subdst) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_surface *dstsurf; + struct r300_texture *tex = r300_texture(dst); + + /* only flush the zmask if we have one attached to this texture */ + if (!tex->zmask_mem[subdst.level]) + return; + + dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst, + subdst.face, subdst.level, 0, + PIPE_BIND_DEPTH_STENCIL); + r300->z_decomp_rd = TRUE; + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_flush_depth_stencil(r300->blitter, dstsurf); + r300_blitter_end(r300); + r300->z_decomp_rd = FALSE; +} + /* Copy a block of pixels from one surface to another using HW. */ static void r300_hw_copy_region(struct pipe_context* pipe, struct pipe_resource *dst, @@ -252,7 +313,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe, { enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - + boolean is_depth; if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -279,6 +340,10 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) { + r300_flush_depth_stencil(pipe, src, subsrc); + } if (old_format != new_format) { dst->format = new_format; src->format = new_format; diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 21f3b9d2610..48c24092114 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,7 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->has_hiz = TRUE; + caps->hiz_ram = 0; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -49,6 +49,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4145: @@ -61,6 +63,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R300; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4150: @@ -77,8 +81,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4148: @@ -91,12 +95,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R350; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4E4A: caps->family = CHIP_FAMILY_R360; caps->high_second_pipe = TRUE; caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5460: @@ -108,8 +116,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; - caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x3150: @@ -120,6 +128,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x3E54: caps->family = CHIP_FAMILY_RV380; caps->high_second_pipe = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x4A48: @@ -135,6 +145,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5548: @@ -149,6 +161,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x554C: @@ -161,6 +175,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5D4C: @@ -172,6 +188,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x4B48: @@ -182,6 +200,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5E4C: @@ -199,34 +219,36 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; - caps->has_hiz = FALSE; caps->has_tcl = FALSE; + caps->zmask_ram = RV3xx_ZMASK_SIZE; break; case 0x791E: @@ -234,6 +256,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x793F: @@ -242,6 +266,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x796C: @@ -251,6 +277,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; caps->is_r400 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7100: @@ -270,6 +298,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R520; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7140: @@ -313,6 +343,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV515; caps->num_vert_fpus = 2; caps->is_r500 = TRUE; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x71C0: @@ -334,6 +366,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV530; caps->num_vert_fpus = 5; caps->is_r500 = TRUE; + /*caps->hiz_ram = RV530_HIZ_LIMIT;*/ + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7240: @@ -354,12 +388,16 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_R580; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7280: caps->family = CHIP_FAMILY_RV570; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; case 0x7281: @@ -376,6 +414,8 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->family = CHIP_FAMILY_RV560; caps->num_vert_fpus = 8; caps->is_r500 = TRUE; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; break; default: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 65750f54e71..e7ca642b4f3 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -25,6 +25,14 @@ #include "pipe/p_compiler.h" +/* these are sizes in dwords */ +#define R300_HIZ_LIMIT 10240 +#define RV530_HIZ_LIMIT 15360 + +/* rv3xx have only one pipe */ +#define PIPE_ZMASK_SIZE 4096 +#define RV3xx_ZMASK_SIZE 5120 + /* Structure containing all the possible information about a specific Radeon * in the R3xx, R4xx, and R5xx families. */ struct r300_capabilities { @@ -42,8 +50,10 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; - /* Some chipsets do not have HiZ RAM. */ - boolean has_hiz; + /* Some chipsets do not have HiZ RAM - other have varying amounts . */ + int hiz_ram; + /* some chipsets have zmask ram per pipe some don't */ + int zmask_ram; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index df903590583..e8b6c4f7af8 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" +#include "r300_hyperz.h" #include "r300_screen.h" #include "r300_screen_buffer.h" #include "r300_winsys.h" @@ -114,6 +115,10 @@ static void r300_destroy_context(struct pipe_context* context) u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_destroy_mm(r300); + translate_cache_destroy(r300->tran.translate_cache); r300_release_referenced_objects(r300); @@ -166,6 +171,9 @@ static void r300_setup_atoms(struct r300_context* r300) boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); + boolean drm_2_6_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0); + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); + boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0; /* Create the actual atom list. * @@ -188,8 +196,8 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); + R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); /* ZB (unpipelined), SC. */ - R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); @@ -220,6 +228,13 @@ static void r300_setup_atoms(struct r300_context* r300) /* TX. */ R300_INIT_ATOM(texture_cache_inval, 2); R300_INIT_ATOM(textures_state, 0); + if (has_hyperz) { + /* HiZ Clear */ + if (has_hiz_ram) + R300_INIT_ATOM(hiz_clear, 0); + /* zmask clear */ + R300_INIT_ATOM(zmask_clear, 0); + } /* ZB (unpipelined), SU. */ R300_INIT_ATOM(query_start, 4); @@ -282,8 +297,7 @@ static void r300_init_states(struct pipe_context *pipe) (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; struct r300_invariant_state *invariant = (struct r300_invariant_state*)r300->invariant_state.state; - struct r300_hyperz_state *hyperz = - (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; pipe->set_blend_color(pipe, &bc); @@ -351,10 +365,20 @@ static void r300_init_states(struct pipe_context *pipe) /* Initialize the hyperz state. */ { - BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + BEGIN_CB(&hyperz->cb_flush_begin, r300->hyperz_state.size); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); + + if (r300->screen->caps.is_r500 || + (r300->screen->caps.is_rv350 && + r300->rws->get_value(r300->rws, R300_VID_DRM_2_6_0))) { + OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); + } END_CB; } } @@ -415,6 +439,10 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, rws->cs_set_flush(r300->cs, r300_flush_cb, r300); + /* setup hyper-z mm */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_hyperz_init_mm(r300); + r300->upload_ib = u_upload_create(&r300->context, 32 * 1024, 16, PIPE_BIND_INDEX_BUFFER); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b4256c62786..d86a5c8fc98 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -106,13 +106,19 @@ struct r300_dsa_state { }; struct r300_hyperz_state { + int current_func; /* -1 after a clear before first op */ + int flush; /* This is actually a command buffer with named dwords. */ + uint32_t cb_flush_begin; + uint32_t zb_zcache_ctlstat; /* R300_ZB_CACHE_CNTL */ uint32_t cb_begin; uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ uint32_t cb_reg1; uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ uint32_t cb_reg2; uint32_t sc_hyperz; /* R300_SC_HYPERZ */ + uint32_t cb_reg3; + uint32_t gb_z_peq_config; /* R300_GB_Z_PEQ_CONFIG: 0x4028 */ }; struct r300_gpu_flush { @@ -321,6 +327,7 @@ struct r300_surface { /* Whether the CBZB clear is allowed on the surface. */ boolean cbzb_allowed; + }; struct r300_texture_desc { @@ -387,6 +394,10 @@ struct r300_texture { /* All bits should be filled in. */ struct r300_texture_fb_state fb_state; + /* hyper-z memory allocs */ + struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS]; + struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS]; + /* This is the level tiling flags were last time set for. * It's used to prevent redundant tiling-flags changes from happening.*/ unsigned surface_level; @@ -512,6 +523,10 @@ struct r300_context { struct r300_atom texture_cache_inval; /* GPU flush. */ struct r300_atom gpu_flush; + /* HiZ clear */ + struct r300_atom hiz_clear; + /* zmask clear */ + struct r300_atom zmask_clear; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -524,6 +539,8 @@ struct r300_context { struct r300_vertex_element_state *velems; bool any_user_vbs; + struct pipe_index_buffer index_buffer; + /* Vertex info for Draw. */ struct vertex_info vertex_info; @@ -547,8 +564,19 @@ struct r300_context { boolean two_sided_color; /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; - + /* Whether fast zclear is enabled. */ + boolean z_fastfill; +#define R300_Z_COMPRESS_44 1 +#define RV350_Z_COMPRESS_88 2 + int z_compression; + boolean hiz_enable; boolean cbzb_clear; + boolean z_decomp_rd; + + /* two mem block managers for hiz/zmask ram space */ + struct mem_block *hiz_mm; + struct mem_block *zmask_mm; + /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -619,7 +647,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { R300_CHANGED_FB_STATE = 0, - R300_CHANGED_CBZB_FLAG + R300_CHANGED_CBZB_FLAG, + R300_CHANGED_ZCLEAR_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 053a64ea6d7..c3e157e99af 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -44,6 +44,7 @@ static const struct debug_named_value debug_options[] = { { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, { "stats", DBG_STATS, "Gather statistics" }, + { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 36a26a78717..17e180a79ac 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -329,6 +330,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +366,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +383,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +417,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +430,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -943,6 +972,101 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 5d05039669f..2f2c2f2dcb4 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -112,6 +112,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi void r300_emit_invariant_state(struct r300_context *r300, unsigned size, void *state); +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state); +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index ae7b5759e78..7fed9b5d074 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -43,16 +43,9 @@ static void r300_flush(struct pipe_context* pipe, u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); - /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. - * - * Of course, the best thing is to kill Draw with fire. :3 */ - if (r300->draw && !r300->draw->flushing) { - draw_flush(r300->draw); - } - if (r300->dirty_hw) { - r300_emit_hyperz_end(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index db5269912e2..87ff49a90c7 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -248,13 +248,18 @@ static void r300_emit_fs_code_to_buffer( shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + - imm_count * 7; + imm_count * 7 + + code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); - OUT_CB_REG(R500_US_CODE_RANGE, + for(i = 0; i < code->int_constant_count; i++){ + OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), + code->int_constants[i]); + } + OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); OUT_CB_REG(R500_US_CODE_ADDR, diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e9528956019..10e440ce306 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -21,25 +21,156 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" +#include "util/u_mm.h" #include "r300_context.h" #include "r300_hyperz.h" #include "r300_reg.h" #include "r300_fs.h" +/* + HiZ rules - taken from various docs + 1. HiZ only works on depth values + 2. Cannot HiZ if stencil fail or zfail is !KEEP + 3. on R300/400, HiZ is disabled if depth test is EQUAL + 4. comparison changes without clears usually mean disabling HiZ +*/ /*****************************************************************************/ /* The HyperZ setup */ /*****************************************************************************/ +static bool r300_get_sc_hz_max(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_SC_HYPERZ_MIN; + + if (func >= 4 && func <= 7) + ret = R300_SC_HYPERZ_MAX; + return ret; +} + +static bool r300_zfunc_same_direction(int func1, int func2) +{ + /* func1 is less/lessthan */ + if (func1 == 1 || func1 == 2) + if (func2 == 3 || func2 == 4 || func2 == 5) + return FALSE; + + if (func2 == 1 || func2 == 2) + if (func1 == 4 || func1 == 5) + return FALSE; + return TRUE; +} + +static int r300_get_hiz_min(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + int func = dsa_state->z_stencil_control & 0x7; + int ret = R300_HIZ_MIN; + + if (func == 1 || func == 2) + ret = R300_HIZ_MAX; + return ret; +} + +static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s) +{ + if (s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP)) + return TRUE; + return FALSE; +} + +static boolean r300_can_hiz(struct r300_context *r300) +{ + struct r300_dsa_state *dsa_state = r300->dsa_state.state; + struct pipe_depth_stencil_alpha_state *dsa = &dsa_state->dsa; + struct r300_screen* r300screen = r300->screen; + struct r300_hyperz_state *z = r300->hyperz_state.state; + + /* shader writes depth - no HiZ */ + if (r300_fragment_shader_writes_depth(r300_fs(r300))) /* (5) */ + return FALSE; + + if (r300->query_current) + return FALSE; + /* if stencil fail/zfail op is not KEEP */ + if (r300_dsa_stencil_op_not_keep(&dsa->stencil[0]) || + r300_dsa_stencil_op_not_keep(&dsa->stencil[1])) + return FALSE; + + if (dsa->depth.enabled) { + /* if depth func is EQUAL pre-r500 */ + if (dsa->depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500) + return FALSE; + /* if depth func is NOTEQUAL */ + if (dsa->depth.func == PIPE_FUNC_NOTEQUAL) + return FALSE; + } + /* depth comparison function - if just cleared save and return okay */ + if (z->current_func == -1) { + int func = dsa_state->z_stencil_control & 0x7; + if (func != 0 && func != 7) + z->current_func = dsa_state->z_stencil_control & 0x7; + } else { + /* simple don't change */ + if (!r300_zfunc_same_direction(z->current_func, (dsa_state->z_stencil_control & 0x7))) { + DBG(r300, DBG_HYPERZ, "z func changed direction - disabling hyper-z %d -> %d\n", z->current_func, dsa_state->z_stencil_control); + return FALSE; + } + } + return TRUE; +} + static void r300_update_hyperz(struct r300_context* r300) { struct r300_hyperz_state *z = (struct r300_hyperz_state*)r300->hyperz_state.state; + z->gb_z_peq_config = 0; z->zb_bw_cntl = 0; z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z->flush = 0; - if (r300->cbzb_clear) + if (r300->cbzb_clear) { z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; + return; + } + + /* Zbuffer compression. */ + if (r300->z_compression) { + z->zb_bw_cntl |= R300_RD_COMP_ENABLE; + if (r300->z_decomp_rd == false) + z->zb_bw_cntl |= R300_WR_COMP_ENABLE; + /* RV350 and up optimizations. */ + if (r300->z_compression == RV350_Z_COMPRESS_88) + z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8; + } + + /* Z fastfill. */ + if (r300->z_fastfill) { + z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/ + } + + if (r300->hiz_enable) { + bool can_hiz = r300_can_hiz(r300); + if (can_hiz) { + z->zb_bw_cntl |= R300_HIZ_ENABLE; + z->sc_hyperz |= R300_SC_HYPERZ_ENABLE; + z->sc_hyperz |= r300_get_sc_hz_max(r300); + z->zb_bw_cntl |= r300_get_hiz_min(r300); + } + } + + if (r300->screen->caps.is_r500) { + /* XXX Are these bits really available on RV350? */ + z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3; + z->zb_bw_cntl |= + R500_HIZ_EQUAL_REJECT_ENABLE | + R500_PEQ_PACKING_ENABLE | + R500_COVERED_PTR_MASKING_ENABLE; + } } /*****************************************************************************/ @@ -126,15 +257,115 @@ static void r300_update_ztop(struct r300_context* r300) } else { ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - if (ztop_state->z_buffer_top != old_ztop) r300->ztop_state.dirty = TRUE; } +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static void r300_update_hiz_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + r300->hiz_clear.size = height * 4; +} + +static void r300_update_zmask_clear(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height; + int mult; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + r300->zmask_clear.size = height * 4; +} + void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); if (r300->hyperz_state.dirty) { r300_update_hyperz(r300); } + + if (r300->hiz_clear.dirty) { + r300_update_hiz_clear(r300); + } + if (r300->zmask_clear.dirty) { + r300_update_zmask_clear(r300); + } +} + +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf) +{ + struct r300_texture *tex; + uint32_t zsize, ndw; + int level = surf->base.level; + + tex = r300_texture(surf->base.texture); + + if (tex->hiz_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + ndw = ALIGN_DIVUP(zsize, 64); + + tex->hiz_mem[level] = u_mmAllocMem(r300->hiz_mm, ndw, 0, 0); + return; +} + +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress) +{ + int bsize = 256; + uint32_t zsize, ndw; + int level = surf->base.level; + struct r300_texture *tex; + + tex = r300_texture(surf->base.texture); + + if (tex->zmask_mem[level]) + return; + + zsize = tex->desc.layer_size_in_bytes[level]; + zsize /= util_format_get_blocksize(tex->desc.b.b.format); + + /* each zmask dword represents 16 4x4 blocks - which is 256 pixels + or 16 8x8 depending on the gb peq flag = 1024 pixels */ + if (compress == RV350_Z_COMPRESS_88) + bsize = 1024; + + ndw = ALIGN_DIVUP(zsize, bsize); + tex->zmask_mem[level] = u_mmAllocMem(r300->zmask_mm, ndw, 0, 0); + return; +} + +void r300_hyperz_init_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + int frag_pipes = r300screen->caps.num_frag_pipes; + + if (r300screen->caps.hiz_ram) + r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes); + + r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes); +} + +void r300_hyperz_destroy_mm(struct r300_context *r300) +{ + struct r300_screen* r300screen = r300->screen; + + if (r300screen->caps.hiz_ram) + u_mmDestroy(r300->hiz_mm); + + u_mmDestroy(r300->zmask_mm); } diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h index 3df5053b896..09e1ff6625c 100644 --- a/src/gallium/drivers/r300/r300_hyperz.h +++ b/src/gallium/drivers/r300/r300_hyperz.h @@ -27,4 +27,9 @@ struct r300_context; void r300_update_hyperz_state(struct r300_context* r300); +void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf); +void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress); + +void r300_hyperz_init_mm(struct r300_context *r300); +void r300_hyperz_destroy_mm(struct r300_context *r300); #endif diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 2acc1a903e8..99a9d650551 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3436,6 +3436,7 @@ enum { # define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) # define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) +#define R300_PACKET3_3D_CLEAR_ZMASK 0x00003200 #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index bae02135da9..910f5f7113e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,7 +223,8 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ - end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ cs_dwords += end_dwords; @@ -566,19 +567,6 @@ static void r300_draw_range_elements(struct pipe_context* pipe, } } -/* Simple helpers for context setup. Should probably be moved to util. */ -static void r300_draw_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, int indexBias, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - - pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - 0, r300->vertex_buffer_max_index, - mode, start, count); -} - static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { @@ -638,111 +626,104 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } -/**************************************************************************** - * The rest of this file is for SW TCL rendering only. Please be polite and * - * keep these functions separated so that they are easier to locate. ~C. * - ***************************************************************************/ - -/* SW TCL arrays, using Draw. */ -static void r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count) +static void r300_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - int i; - if (r300->skip_rendering) { - return; + if (info->indexed && r300->index_buffer.buffer) { + unsigned offset; + + assert(r300->index_buffer.offset % r300->index_buffer.index_size == 0); + offset = r300->index_buffer.offset / r300->index_buffer.index_size; + + r300_draw_range_elements(pipe, + r300->index_buffer.buffer, + r300->index_buffer.index_size, + info->index_bias, + info->min_index, + info->max_index, + info->mode, + info->start + offset, + info->count); } - - if (!u_trim_pipe_prim(mode, &count)) { - return; - } - - r300_update_derived_state(r300); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); - - draw_arrays(r300->draw, mode, start, count); - - /* XXX Not sure whether this is the best fix. - * It prevents CS from being rejected and weird assertion failures. */ - draw_flush(r300->draw); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + else { + r300_draw_arrays(pipe, + info->mode, + info->start, + info->count); } } +/**************************************************************************** + * The rest of this file is for SW TCL rendering only. Please be polite and * + * keep these functions separated so that they are easier to locate. ~C. * + ***************************************************************************/ + /* SW TCL elements, using Draw. */ -static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_resource* indexBuffer, - unsigned indexSize, - int indexBias, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static void r300_swtcl_draw_vbo(struct pipe_context* pipe, + const struct pipe_draw_info *info) { struct r300_context* r300 = r300_context(pipe); struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; - struct pipe_transfer *ib_transfer; + struct pipe_transfer *ib_transfer = NULL; + unsigned count = info->count; int i; - void* indices; + void* indices = NULL; if (r300->skip_rendering) { return; } - if (!u_trim_pipe_prim(mode, &count)) { + if (!u_trim_pipe_prim(info->mode, &count)) { return; } r300_update_derived_state(r300); for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe, - r300->vertex_buffer[i].buffer, - PIPE_TRANSFER_READ, - &vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); + if (r300->vertex_buffer[i].buffer) { + void *buf = pipe_buffer_map(pipe, + r300->vertex_buffer[i].buffer, + PIPE_TRANSFER_READ, + &vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + } + + if (info->indexed && r300->index_buffer.buffer) { + indices = pipe_buffer_map(pipe, r300->index_buffer.buffer, + PIPE_TRANSFER_READ, &ib_transfer); + if (indices) + indices = (void *) ((char *) indices + r300->index_buffer.offset); } - indices = pipe_buffer_map(pipe, indexBuffer, - PIPE_TRANSFER_READ, &ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, - minIndex, maxIndex, indices); + draw_set_mapped_element_buffer_range(r300->draw, (indices) ? + r300->index_buffer.index_size : 0, + info->index_bias, + info->min_index, + info->max_index, + indices); - draw_arrays(r300->draw, mode, start, count); + draw_arrays(r300->draw, info->mode, info->start, count); /* XXX Not sure whether this is the best fix. * It prevents CS from being rejected and weird assertion failures. */ draw_flush(r300->draw); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, - vb_transfer[i]); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + if (r300->vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } } - pipe_buffer_unmap(pipe, indexBuffer, - ib_transfer); - draw_set_mapped_element_buffer_range(r300->draw, 0, 0, - start, start + count - 1, - NULL); + if (ib_transfer) { + pipe_buffer_unmap(pipe, r300->index_buffer.buffer, ib_transfer); + draw_set_mapped_element_buffer_range(r300->draw, 0, 0, info->start, + info->start + count - 1, NULL); + } } /* Object for rendering using Draw. */ @@ -820,6 +801,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) PIPE_TRANSFER_WRITE, &r300render->vbo_transfer); + assert(r300render->vbo_ptr); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } @@ -1141,16 +1124,11 @@ static void r300_resource_resolve(struct pipe_context* pipe, void r300_init_render_functions(struct r300_context *r300) { - /* Set generic functions. */ - r300->context.draw_elements = r300_draw_elements; - /* Set draw functions based on presence of HW TCL. */ if (r300->screen->caps.has_tcl) { - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_vbo = r300_draw_vbo; } else { - r300->context.draw_arrays = r300_swtcl_draw_arrays; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + r300->context.draw_vbo = r300_swtcl_draw_vbo; } r300->context.resource_resolve = r300_resource_resolve; diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c index 9a6b4e12ff1..1f035d64a28 100644 --- a/src/gallium/drivers/r300/r300_render_stencilref.c +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -34,13 +34,8 @@ #include "r300_reg.h" struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); + void (*draw_vbo)(struct pipe_context *pipe, + const struct pipe_draw_info *info); uint32_t rs_cull_mode; uint32_t zb_stencilrefmask; @@ -105,41 +100,19 @@ static void r300_stencilref_end(struct r300_context *r300) r300->dsa_state.dirty = TRUE; } -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) +static void r300_stencilref_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) { struct r300_context *r300 = r300_context(pipe); struct r300_stencilref_context *sr = r300->stencilref_fallback; if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); } else { r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); + sr->draw_vbo(pipe, info); r300_stencilref_end(r300); } } @@ -148,11 +121,9 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) { r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + /* Save original draw function. */ + r300->stencilref_fallback->draw_vbo = r300->context.draw_vbo; - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + /* Override the draw function. */ + r300->context.draw_vbo = r300_stencilref_draw_vbo; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 676430f5fee..6268001054b 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -279,11 +279,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 3: case 4: case 6: + return FALSE; +#if 0 if (usage != PIPE_BIND_RENDER_TARGET || !util_format_is_rgba8_variant( util_format_description(format))) { return FALSE; } +#endif break; default: return FALSE; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 18745b83a09..13a3320b992 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -91,6 +91,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_FB (1 << 9) #define DBG_RS_BLOCK (1 << 10) #define DBG_CBZB (1 << 11) +#define DBG_HYPERZ (1 << 12) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3e221f2e02d..1e6b81d7989 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -25,6 +25,7 @@ #include "util/u_blitter.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_memory.h" #include "util/u_pack_color.h" @@ -43,6 +44,7 @@ #include "r300_texture.h" #include "r300_vs.h" #include "r300_winsys.h" +#include "r300_hyperz.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -446,7 +448,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, r300->clip_state.dirty = TRUE; } else { - draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); } } @@ -473,14 +474,14 @@ static void* dsa->dsa = *state; - /* Depth test setup. */ + /* Depth test setup. - separate write mask depth for decomp flush */ + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + if (state->depth.enabled) { dsa->z_buffer_control |= R300_Z_ENABLE; - if (state->depth.writemask) { - dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; - } - dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); @@ -593,6 +594,7 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, UPDATE_STATE(state, r300->dsa_state); + r300->hyperz_state.dirty = TRUE; /* Will be updated before the emission. */ r300_dsa_inject_stencilref(r300); } @@ -682,11 +684,13 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, enum r300_fb_state_change change) { struct pipe_framebuffer_state *state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); /* What is marked as dirty depends on the enum r300_fb_state_change. */ r300->gpu_flush.dirty = TRUE; r300->fb_state.dirty = TRUE; - r300->hyperz_state.dirty = TRUE; + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300->hyperz_state.dirty = TRUE; if (change == R300_CHANGED_FB_STATE) { r300->aa_state.dirty = TRUE; @@ -698,8 +702,11 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, if (r300->cbzb_clear) r300->fb_state.size += 10; - else if (state->zsbuf) - r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; + else if (state->zsbuf) { + r300->fb_state.size += 10; + if (has_hyperz) + r300->fb_state.size += r300->screen->caps.hiz_ram ? 8 : 4; + } /* The size of the rest of atoms stays the same. */ } @@ -711,8 +718,10 @@ static void struct r300_context* r300 = r300_context(pipe); struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; + int blocksize; if (r300->screen->caps.is_r500) { max_width = max_height = 4096; @@ -728,10 +737,6 @@ static void return; } - if (r300->draw) { - draw_flush(r300->draw); - } - /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { r300->blend_state.dirty = TRUE; @@ -748,20 +753,57 @@ static void r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE); - /* Polygon offset depends on the zbuffer bit depth. */ - if (state->zsbuf && r300->polygon_offset_enabled) { - switch (util_format_get_blocksize(state->zsbuf->texture->format)) { - case 2: - zbuffer_bpp = 16; - break; - case 4: - zbuffer_bpp = 24; - break; + r300->hiz_enable = false; + r300->z_fastfill = false; + r300->z_compression = false; + + if (state->zsbuf) { + blocksize = util_format_get_blocksize(state->zsbuf->texture->format); + switch (blocksize) { + case 2: + zbuffer_bpp = 16; + break; + case 4: + zbuffer_bpp = 24; + break; } + if (has_hyperz) { + struct r300_surface *zs_surf = r300_surface(state->zsbuf); + struct r300_texture *tex; + int compress = r300->screen->caps.is_rv350 ? RV350_Z_COMPRESS_88 : R300_Z_COMPRESS_44; + int level = zs_surf->base.level; + + tex = r300_texture(zs_surf->base.texture); + + /* work out whether we can support hiz on this buffer */ + r300_hiz_alloc_block(r300, zs_surf); + + /* work out whether we can support zmask features on this buffer */ + r300_zmask_alloc_block(r300, zs_surf, compress); + + if (tex->hiz_mem[level]) { + r300->hiz_enable = 1; + } + if (tex->zmask_mem[level]) { + r300->z_fastfill = 1; + /* compression causes hangs on 16-bit */ + if (zbuffer_bpp == 24) + r300->z_compression = compress; + } + DBG(r300, DBG_HYPERZ, + "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable, + tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef, + r300->z_compression, r300->z_fastfill, + tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef); + } + + /* Polygon offset depends on the zbuffer bit depth. */ if (r300->zbuffer_bpp != zbuffer_bpp) { r300->zbuffer_bpp = zbuffer_bpp; - r300->rs_state.dirty = TRUE; + + if (r300->polygon_offset_enabled) + r300->rs_state.dirty = TRUE; } } @@ -1019,7 +1061,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) stuffing_enable |= - R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); + R300_GB_TEX_ST << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } point_texcoord_left = 0.0f; @@ -1096,14 +1138,11 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) boolean last_two_sided_color = r300->two_sided_color; if (r300->draw && rs) { - draw_flush(r300->draw); draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state); } if (rs) { - r300->polygon_offset_enabled = (rs->rs.offset_point || - rs->rs.offset_line || - rs->rs.offset_tri); + r300->polygon_offset_enabled = rs->polygon_offset_enable; r300->sprite_coord_enable = rs->rs.sprite_coord_enable; r300->two_sided_color = rs->rs.light_twoside; } else { @@ -1385,7 +1424,6 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport = *state; if (r300->draw) { - draw_flush(r300->draw); draw_set_viewport_state(r300->draw, state); viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT; return; @@ -1486,7 +1524,6 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, } else { /* SW TCL. */ - draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } @@ -1505,6 +1542,23 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; } +static void r300_set_index_buffer(struct pipe_context* pipe, + const struct pipe_index_buffer *ib) +{ + struct r300_context* r300 = r300_context(pipe); + + if (ib) { + pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer); + memcpy(&r300->index_buffer, ib, sizeof(r300->index_buffer)); + } + else { + pipe_resource_reference(&r300->index_buffer.buffer, NULL); + memset(&r300->index_buffer, 0, sizeof(r300->index_buffer)); + } + + /* TODO make this more like a state */ +} + /* Initialize the PSC tables. */ static void r300_vertex_psc(struct r300_vertex_element_state *velems) { @@ -1654,7 +1708,6 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, r300->velems = velems; if (r300->draw) { - draw_flush(r300->draw); draw_set_vertex_elements(r300->draw, velems->count, velems->velem); return; } @@ -1720,7 +1773,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300->pvs_flush.dirty = TRUE; } else { - draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)vs->draw_vs); } @@ -1852,6 +1904,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_index_buffer = r300_set_index_buffer; r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index a85db27064c..f3dad4c2923 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -35,6 +35,7 @@ #include "r300_state_inlines.h" #include "r300_texture.h" #include "r300_vs.h" +#include "r300_winsys.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ @@ -213,19 +214,19 @@ static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { if (swiz == SWIZ_X001) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); } else { - rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + rs->ip[id] |= R300_RS_TEX_PTR(ptr) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | @@ -261,23 +262,21 @@ static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, enum r300_rs_swizzle swiz) { - int rs_tex_comp = ptr*4; - if (swiz == SWIZ_X001) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | R500_RS_SEL_T(R500_RS_IP_PTR_K0) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else if (swiz == SWIZ_XY01) { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else { - rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); + rs->ip[id] |= R500_RS_SEL_S(ptr) | + R500_RS_SEL_T(ptr + 1) | + R500_RS_SEL_R(ptr + 2) | + R500_RS_SEL_Q(ptr + 3); } rs->inst[id] |= R500_RS_INST_TEX_ID(id); } @@ -305,7 +304,7 @@ static void r300_update_rs_block(struct r300_context *r300) struct r300_shader_semantics *vs_outputs = &vs->outputs; struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; struct r300_rs_block rs = {0}; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); @@ -393,8 +392,9 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); tex_count++; + tex_ptr += 4; } } } @@ -412,7 +412,7 @@ static void r300_update_rs_block(struct r300_context *r300) } /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, + rX00_rs_tex(&rs, tex_count, tex_ptr, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); /* Write it to the FS input register if it's needed by the FS. */ @@ -429,6 +429,7 @@ static void r300_update_rs_block(struct r300_context *r300) i, sprite_coord ? " (sprite coord)" : ""); } tex_count++; + tex_ptr += sprite_coord ? 2 : 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -449,7 +450,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_X001); /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { @@ -461,6 +462,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n"); } tex_count++; + tex_ptr += 4; } else { /* Skip the FS input register, leave it uninitialized. */ /* If we try to set it to (0,0,0,1), it will lock up. */ @@ -480,7 +482,7 @@ static void r300_update_rs_block(struct r300_context *r300) stream_loc_notcl[loc++] = 6 + tex_count; /* Rasterize it. */ - rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + rX00_rs_tex(&rs, tex_count, tex_ptr, SWIZ_XYZW); /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); @@ -489,6 +491,7 @@ static void r300_update_rs_block(struct r300_context *r300) fp_offset++; tex_count++; + tex_ptr += 4; } /* Invalidate the rest of the no-TCL (GA) stream locations. */ @@ -507,7 +510,7 @@ static void r300_update_rs_block(struct r300_context *r300) DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, " "generics: %i.\n", col_count, tex_count); - rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | + rs.count = MIN2(tex_ptr, 32) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; count = MAX3(col_count, tex_count, 1); @@ -691,5 +694,6 @@ void r300_update_derived_state(struct r300_context* r300) } } - r300_update_hyperz_state(r300); + if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ)) + r300_update_hyperz_state(r300); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fcdca5605e9..da8eadd3b53 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -35,6 +35,7 @@ #include "util/u_format_s3tc.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_mm.h" #include "pipe/p_screen.h" @@ -645,8 +646,16 @@ static void r300_texture_destroy(struct pipe_screen *screen, { struct r300_texture* tex = (struct r300_texture*)texture; struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; + int i; rws->buffer_reference(rws, &tex->buffer, NULL); + for (i = 0; i < R300_MAX_TEXTURE_LEVELS; i++) { + if (tex->hiz_mem[i]) + u_mmFreeMem(tex->hiz_mem[i]); + if (tex->zmask_mem[i]) + u_mmFreeMem(tex->zmask_mem[i]); + } + FREE(tex); } diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c index 343089bf2c5..5d690e8c332 100644 --- a/src/gallium/drivers/r300/r300_texture_desc.c +++ b/src/gallium/drivers/r300/r300_texture_desc.c @@ -136,13 +136,27 @@ static unsigned r300_texture_get_stride(struct r300_screen *screen, stride = util_format_get_stride(desc->b.b.format, width); - /* Some IGPs need a minimum stride of 64 bytes, hmm... - * This doesn't seem to apply to tiled textures, according to r300c. */ - if (!desc->microtile && !desc->macrotile[level] && + /* Some IGPs need a minimum stride of 64 bytes, hmm... */ + if (!desc->macrotile[level] && (screen->caps.family == CHIP_FAMILY_RS600 || screen->caps.family == CHIP_FAMILY_RS690 || screen->caps.family == CHIP_FAMILY_RS740)) { - return stride < 64 ? 64 : stride; + unsigned min_stride; + + if (desc->microtile) { + unsigned tile_height = + r300_get_pixel_alignment(desc->b.b.format, + desc->b.b.nr_samples, + desc->microtile, + desc->macrotile[level], + DIM_HEIGHT); + + min_stride = 64 / tile_height; + } else { + min_stride = 64; + } + + return stride < min_stride ? min_stride : stride; } /* The alignment to 32 bytes is sort of implied by the layout... */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index ff11546a647..187780750fa 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -49,6 +49,8 @@ enum r300_value_id { R300_VID_Z_PIPES, R300_VID_SQUARE_TILING_SUPPORT, R300_VID_DRM_2_3_0, + R300_VID_DRM_2_6_0, + R300_CAN_HYPERZ, }; enum r300_reference_domain { /* bitfield */ |