From 8c836f7f740c6f74511c727c7bed0680ddba9974 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 12 Jul 2010 13:23:24 +0200 Subject: r300g: implement fast color clear An initial implementation made by Dave Airlie. For it to be used, a color-only clear must be invoked and exactly one point-sampled render target must be set. The render target must be macrotiled (for us to overcome alignment issues) and bpp must be either 16 or 32. I can't see a difference in performance. :( Conflicts: src/gallium/drivers/r300/r300_blit.c --- src/gallium/drivers/r300/r300_blit.c | 71 ++++++++++++++++++++++++++++++++- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_context.h | 11 ++++- src/gallium/drivers/r300/r300_emit.c | 44 +++++++++++++++++--- src/gallium/drivers/r300/r300_emit.h | 8 ++-- src/gallium/drivers/r300/r300_flush.c | 1 + src/gallium/drivers/r300/r300_hyperz.c | 19 +++++++++ src/gallium/drivers/r300/r300_render.c | 1 + src/gallium/drivers/r300/r300_state.c | 4 +- src/gallium/drivers/r300/r300_texture.c | 30 ++++++++++++++ 10 files changed, 178 insertions(+), 13 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 3cc054788bc..895efaa1c4a 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -24,6 +24,7 @@ #include "r300_texture.h" #include "util/u_format.h" +#include "util/u_pack_color.h" enum r300_blitter_op /* bitmask */ { @@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300) } } +static uint32_t r300_depth_clear_cb_value(enum pipe_format format, + const float* rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + + if (util_format_get_blocksizebits(format) == 32) + return uc.ui; + else + return uc.us | (uc.us << 16); +} + +static boolean r300_cbzb_clear_allowed(struct r300_context *r300, + unsigned clear_buffers) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + unsigned bpp; + + /* Only color clear allowed, and only one colorbuffer. */ + if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1) + return FALSE; + + /* The colorbuffer must be point-sampled. */ + if (surf->base.texture->nr_samples > 1) + return FALSE; + + bpp = util_format_get_blocksizebits(surf->base.format); + + /* ZB can only work with the two pixel sizes. */ + if (bpp != 16 && bpp != 32) + return FALSE; + + /* If the midpoint ZB offset is not aligned to 2048, it returns garbage + * with certain texture sizes. Macrotiling ensures the alignment. */ + if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level]) + return FALSE; + + return TRUE; +} + /* Clear currently bound buffers. */ static void r300_clear(struct pipe_context* pipe, unsigned buffers, @@ -124,16 +167,40 @@ static void r300_clear(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + uint32_t width = fb->width; + uint32_t height = fb->height; + + /* Enable CBZB clear. */ + if (r300_cbzb_clear_allowed(r300, buffers)) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + hyperz->zb_depthclearvalue = + r300_depth_clear_cb_value(surf->base.format, rgba); + + width = surf->cbzb_width; + height = surf->cbzb_height; + + r300->cbzb_clear = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } /* Clear. */ r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, - fb->width, - fb->height, + width, + height, fb->nr_cbufs, buffers, rgba, depth, stencil); r300_blitter_end(r300); + /* Disable CBZB clear. */ + if (r300->cbzb_clear) { + r300->cbzb_clear = FALSE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG); + } + /* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */ if (r300->flush_counter == 0) pipe->flush(pipe, 0, NULL); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index cce76cb1dfe..1beab7628a0 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe) BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size); OUT_CB_REG(R300_ZB_BW_CNTL, 0); OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); - OUT_CB_REG(R300_SC_HYPERZ, 0x1C); + OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); END_CB; } } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2483af7fb5e..df4299b7ea3 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -311,6 +311,13 @@ struct r300_surface { uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */ + + /* Parameters dedicated to the CBZB clear. */ + uint32_t cbzb_width; /* Aligned width. */ + uint32_t cbzb_height; /* Half of the height. */ + uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */ + uint32_t cbzb_pitch; /* DEPTHPITCH. */ + uint32_t cbzb_format; /* ZB_FORMAT. */ }; struct r300_texture { @@ -525,6 +532,7 @@ struct r300_context { /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ boolean incompatible_vb_layout; + boolean cbzb_clear; /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; @@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); /* r300_state.c */ enum r300_fb_state_change { - R300_CHANGED_FB_STATE = 0 + R300_CHANGED_FB_STATE = 0, + R300_CHANGED_CBZB_FLAG }; void r300_mark_fb_state_dirty(struct r300_context *r300, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5ce3eb63c56..e1cb2bf5012 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,6 +32,7 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_texture.h" #include "r300_screen_buffer.h" #include "r300_vs.h" @@ -272,8 +273,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height = fb->height; + uint32_t width = fb->width; CS_LOCALS(r300); + if (r300->cbzb_clear) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + height = surf->cbzb_height; + width = surf->cbzb_width; + } + BEGIN_CS(size); /* Set up scissors. @@ -281,13 +291,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); } /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ @@ -344,8 +354,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); } + /* Set up the ZB part of the CBZB clear. */ + if (r300->cbzb_clear) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0); + } /* Set up a zbuffer. */ - if (fb->zsbuf) { + else if (fb->zsbuf) { surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); @@ -377,6 +399,18 @@ void r300_emit_hyperz_state(struct r300_context *r300, WRITE_CS_TABLE(state, size); } +void r300_emit_hyperz_end(struct r300_context *r300) +{ + struct r300_hyperz_state z = + *(struct r300_hyperz_state*)r300->hyperz_state.state; + + z.zb_bw_cntl = 0; + z.zb_depthclearvalue = 0; + z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); +} + void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 586ccda620b..5d05039669f 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_hyperz_end(struct r300_context *r300); + void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); -void r300_emit_hyperz_state(struct r300_context *r300, - unsigned size, void *state); - void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 2ebf1c814b4..6f31ba159ac 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -48,6 +48,7 @@ static void r300_flush(struct pipe_context* pipe, } if (r300->dirty_hw) { + r300_emit_hyperz_end(r300); r300_emit_query_end(r300); r300->flush_counter++; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index 2c4e6c72110..e9528956019 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -26,6 +26,22 @@ #include "r300_reg.h" #include "r300_fs.h" +/*****************************************************************************/ +/* The HyperZ setup */ +/*****************************************************************************/ + +static void r300_update_hyperz(struct r300_context* r300) +{ + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + + z->zb_bw_cntl = 0; + z->sc_hyperz = R300_SC_HYPERZ_ADJ_2; + + if (r300->cbzb_clear) + z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY; +} + /*****************************************************************************/ /* The ZTOP state */ /*****************************************************************************/ @@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_hyperz_state(struct r300_context* r300) { r300_update_ztop(r300); + if (r300->hyperz_state.dirty) { + r300_update_hyperz(r300); + } } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index adb02b4e639..1e0369b3774 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300, /* Emitted in flush. */ end_dwords += 26; /* emit_query_end */ + end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */ cs_dwords += end_dwords; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index b0722cb95f6..f4c6a262d4a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* Now compute the fb_state atom size. */ r300->fb_state.size = 2 + (8 * state->nr_cbufs); - if (state->zsbuf) + if (r300->cbzb_clear) + r300->fb_state.size += 10; + else if (state->zsbuf) r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14; /* The size of the rest of atoms stays the same. */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index d378a7150d6..e8b1d670070 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { + uint32_t stride, offset, tile_height; + pipe_reference_init(&surface->base.reference, 1); pipe_resource_reference(&surface->base.texture, texture); surface->base.format = texture->format; @@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->offset = r300_texture_get_offset(tex, level, zslice, face); surface->pitch = tex->fb_state.pitch[level]; surface->format = tex->fb_state.format; + + /* Parameters for the CBZB clear. */ + surface->cbzb_width = align(surface->base.width, 64); + + /* Height must be aligned to the size of a tile. */ + tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_HEIGHT); + surface->cbzb_height = align((surface->base.height + 1) / 2, + tile_height); + + /* Offset must be aligned to 2K and must point at the beginning + * of a scanline. */ + stride = r300_texture_get_stride(r300_screen(screen), tex, level); + offset = surface->offset + stride * surface->cbzb_height; + surface->cbzb_midpoint_offset = offset & ~2047; + + surface->cbzb_pitch = surface->pitch & 0x1ffffc; + + if (util_format_get_blocksizebits(surface->base.format) == 32) + surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + else + surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z; + + SCREEN_DBG(r300_screen(screen), DBG_TEX, + "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n", + surface->cbzb_width, surface->cbzb_height, + offset & 2047, + tex->mip_macrotile[level] ? "YES" : " NO"); } return &surface->base; -- cgit v1.2.3