summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2010-07-12 13:23:24 +0200
committerMarek Olšák <[email protected]>2010-07-12 13:26:00 +0200
commit8c836f7f740c6f74511c727c7bed0680ddba9974 (patch)
tree2e26e477cb1a9effb97eec38f7734fc87507312f
parent78e8a8765f435bf0902d62afbcb3b8d68a0b716f (diff)
r300g: implement fast color clear
An initial implementation made by Dave Airlie. For it to be used, a color-only clear must be invoked and exactly one point-sampled render target must be set. The render target must be macrotiled (for us to overcome alignment issues) and bpp must be either 16 or 32. I can't see a difference in performance. :( Conflicts: src/gallium/drivers/r300/r300_blit.c
-rw-r--r--src/gallium/drivers/r300/r300_blit.c71
-rw-r--r--src/gallium/drivers/r300/r300_context.c2
-rw-r--r--src/gallium/drivers/r300/r300_context.h11
-rw-r--r--src/gallium/drivers/r300/r300_emit.c44
-rw-r--r--src/gallium/drivers/r300/r300_emit.h8
-rw-r--r--src/gallium/drivers/r300/r300_flush.c1
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c19
-rw-r--r--src/gallium/drivers/r300/r300_render.c1
-rw-r--r--src/gallium/drivers/r300/r300_state.c4
-rw-r--r--src/gallium/drivers/r300/r300_texture.c30
10 files changed, 178 insertions, 13 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 3cc054788bc..895efaa1c4a 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -24,6 +24,7 @@
#include "r300_texture.h"
#include "util/u_format.h"
+#include "util/u_pack_color.h"
enum r300_blitter_op /* bitmask */
{
@@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300)
}
}
+static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
+ const float* rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+
+ if (util_format_get_blocksizebits(format) == 32)
+ return uc.ui;
+ else
+ return uc.us | (uc.us << 16);
+}
+
+static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
+ unsigned clear_buffers)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+ unsigned bpp;
+
+ /* Only color clear allowed, and only one colorbuffer. */
+ if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
+ return FALSE;
+
+ /* The colorbuffer must be point-sampled. */
+ if (surf->base.texture->nr_samples > 1)
+ return FALSE;
+
+ bpp = util_format_get_blocksizebits(surf->base.format);
+
+ /* ZB can only work with the two pixel sizes. */
+ if (bpp != 16 && bpp != 32)
+ return FALSE;
+
+ /* If the midpoint ZB offset is not aligned to 2048, it returns garbage
+ * with certain texture sizes. Macrotiling ensures the alignment. */
+ if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
+ return FALSE;
+
+ return TRUE;
+}
+
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -124,16 +167,40 @@ static void r300_clear(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_hyperz_state *hyperz =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+ uint32_t width = fb->width;
+ uint32_t height = fb->height;
+
+ /* Enable CBZB clear. */
+ if (r300_cbzb_clear_allowed(r300, buffers)) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ hyperz->zb_depthclearvalue =
+ r300_depth_clear_cb_value(surf->base.format, rgba);
+
+ width = surf->cbzb_width;
+ height = surf->cbzb_height;
+
+ r300->cbzb_clear = TRUE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
/* Clear. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
- fb->width,
- fb->height,
+ width,
+ height,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
+ /* Disable CBZB clear. */
+ if (r300->cbzb_clear) {
+ r300->cbzb_clear = FALSE;
+ r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
+ }
+
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
if (r300->flush_counter == 0)
pipe->flush(pipe, 0, NULL);
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index cce76cb1dfe..1beab7628a0 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe)
BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
- OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
+ OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
END_CB;
}
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 2483af7fb5e..df4299b7ea3 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -311,6 +311,13 @@ struct r300_surface {
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
+
+ /* Parameters dedicated to the CBZB clear. */
+ uint32_t cbzb_width; /* Aligned width. */
+ uint32_t cbzb_height; /* Half of the height. */
+ uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
+ uint32_t cbzb_pitch; /* DEPTHPITCH. */
+ uint32_t cbzb_format; /* ZB_FORMAT. */
};
struct r300_texture {
@@ -525,6 +532,7 @@ struct r300_context {
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
+ boolean cbzb_clear;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
@@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
/* r300_state.c */
enum r300_fb_state_change {
- R300_CHANGED_FB_STATE = 0
+ R300_CHANGED_FB_STATE = 0,
+ R300_CHANGED_CBZB_FLAG
};
void r300_mark_fb_state_dirty(struct r300_context *r300,
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 5ce3eb63c56..e1cb2bf5012 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -32,6 +32,7 @@
#include "r300_emit.h"
#include "r300_fs.h"
#include "r300_screen.h"
+#include "r300_texture.h"
#include "r300_screen_buffer.h"
#include "r300_vs.h"
@@ -272,8 +273,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
+ uint32_t height = fb->height;
+ uint32_t width = fb->width;
CS_LOCALS(r300);
+ if (r300->cbzb_clear) {
+ struct r300_surface *surf = r300_surface(fb->cbufs[0]);
+
+ height = surf->cbzb_height;
+ width = surf->cbzb_width;
+ }
+
BEGIN_CS(size);
/* Set up scissors.
@@ -281,13 +291,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
- OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
- ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
}
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
@@ -344,8 +354,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
}
+ /* Set up the ZB part of the CBZB clear. */
+ if (r300->cbzb_clear) {
+ surf = r300_surface(fb->cbufs[0]);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
+ }
/* Set up a zbuffer. */
- if (fb->zsbuf) {
+ else if (fb->zsbuf) {
surf = r300_surface(fb->zsbuf);
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
@@ -377,6 +399,18 @@ void r300_emit_hyperz_state(struct r300_context *r300,
WRITE_CS_TABLE(state, size);
}
+void r300_emit_hyperz_end(struct r300_context *r300)
+{
+ struct r300_hyperz_state z =
+ *(struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z.zb_bw_cntl = 0;
+ z.zb_depthclearvalue = 0;
+ z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
+}
+
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state)
{
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 586ccda620b..5d05039669f 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
+void r300_emit_hyperz_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_hyperz_end(struct r300_context *r300);
+
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
-void r300_emit_hyperz_state(struct r300_context *r300,
- unsigned size, void *state);
-
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 2ebf1c814b4..6f31ba159ac 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -48,6 +48,7 @@ static void r300_flush(struct pipe_context* pipe,
}
if (r300->dirty_hw) {
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
r300->flush_counter++;
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 2c4e6c72110..e9528956019 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -27,6 +27,22 @@
#include "r300_fs.h"
/*****************************************************************************/
+/* The HyperZ setup */
+/*****************************************************************************/
+
+static void r300_update_hyperz(struct r300_context* r300)
+{
+ struct r300_hyperz_state *z =
+ (struct r300_hyperz_state*)r300->hyperz_state.state;
+
+ z->zb_bw_cntl = 0;
+ z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
+
+ if (r300->cbzb_clear)
+ z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
+}
+
+/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
@@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
+ if (r300->hyperz_state.dirty) {
+ r300_update_hyperz(r300);
+ }
}
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index adb02b4e639..1e0369b3774 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
/* Emitted in flush. */
end_dwords += 26; /* emit_query_end */
+ end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
cs_dwords += end_dwords;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index b0722cb95f6..f4c6a262d4a 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
/* Now compute the fb_state atom size. */
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
- if (state->zsbuf)
+ if (r300->cbzb_clear)
+ r300->fb_state.size += 10;
+ else if (state->zsbuf)
r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
/* The size of the rest of atoms stays the same. */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index d378a7150d6..e8b1d670070 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
if (surface) {
+ uint32_t stride, offset, tile_height;
+
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.format = texture->format;
@@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
surface->offset = r300_texture_get_offset(tex, level, zslice, face);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
+
+ /* Parameters for the CBZB clear. */
+ surface->cbzb_width = align(surface->base.width, 64);
+
+ /* Height must be aligned to the size of a tile. */
+ tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+ DIM_HEIGHT);
+ surface->cbzb_height = align((surface->base.height + 1) / 2,
+ tile_height);
+
+ /* Offset must be aligned to 2K and must point at the beginning
+ * of a scanline. */
+ stride = r300_texture_get_stride(r300_screen(screen), tex, level);
+ offset = surface->offset + stride * surface->cbzb_height;
+ surface->cbzb_midpoint_offset = offset & ~2047;
+
+ surface->cbzb_pitch = surface->pitch & 0x1ffffc;
+
+ if (util_format_get_blocksizebits(surface->base.format) == 32)
+ surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ else
+ surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
+
+ SCREEN_DBG(r300_screen(screen), DBG_TEX,
+ "CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
+ surface->cbzb_width, surface->cbzb_height,
+ offset & 2047,
+ tex->mip_macrotile[level] ? "YES" : " NO");
}
return &surface->base;