summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/r300_blit.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2013-01-12 03:29:40 +0100
committerMarek Olšák <[email protected]>2013-01-15 21:48:58 +0100
commitca2c28859eca83f8fbf1f43616f5ef861e95e8d6 (patch)
treece839793a720913a19cf69f1e6734fdab3e016ab /src/gallium/drivers/r300/r300_blit.c
parent1dfe8eead95613a7db62dd17d3da56884b5a887e (diff)
r300g: implement MSAA compression and fast MSAA color clear
These are optimizations which make MSAA a lot faster. The MSAA work is complete with this commit. (except for enablement of AA optimizations for RGBA16F, for which a patch is ready and waiting until the kernel CS checker fix lands) MSAA can't be made any faster as far as hw programming is concerned. The catch is only one process and one colorbuffer can use the optimizations at a time. There usually is only one MSAA colorbuffer, so it shouldn't be an issue. Also, there is a limit on the size of MSAA colorbuffer resolution in terms of megapixels. If the limit is surpassed, the AA optimizations are disabled. The limit is: - 1 Mpix on low-end and some mid-level chipsets (1024x768 and 1280x720) - 2 Mpix on some mid-level chipsets (1600x1200 and 1920x1080) - 3 or 4 Mpix on high-end chipsets (2048x1536 or 2560x1600, respectively) It corresponds to the number of raster pipes (= GB pipes) available, each pipe can hold 1 Mpix of AA compression data. If it's enabled, the driver prints to stdout: radeon: Acquired access to AA optimizations.
Diffstat (limited to 'src/gallium/drivers/r300/r300_blit.c')
-rw-r--r--src/gallium/drivers/r300/r300_blit.c68
1 files changed, 66 insertions, 2 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index f8d3b1fd1d1..2bb6063846b 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -26,6 +26,7 @@
#include "r300_reg.h"
#include "util/u_format.h"
+#include "util/u_half.h"
#include "util/u_pack_color.h"
#include "util/u_surface.h"
@@ -176,6 +177,25 @@ static uint32_t r300_hiz_clear_value(double depth)
return r | (r << 8) | (r << 16) | (r << 24);
}
+static void r300_set_clear_color(struct r300_context *r300,
+ const union pipe_color_union *color)
+{
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ union util_color uc;
+
+ memset(&uc, 0, sizeof(uc));
+ util_pack_color(color->f, fb->cbufs[0]->format, &uc);
+
+ if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+ /* (0,1,2,3) maps to (B,G,R,A) */
+ r300->color_clear_value_gb = uc.h[0] | ((uint32_t)uc.h[1] << 16);
+ r300->color_clear_value_ar = uc.h[2] | ((uint32_t)uc.h[3] << 16);
+ } else {
+ r300->color_clear_value = uc.ui;
+ }
+}
+
DEBUG_GET_ONCE_BOOL_OPTION(hyperz, "RADEON_HYPERZ", FALSE)
/* Clear currently bound buffers. */
@@ -287,8 +307,44 @@ static void r300_clear(struct pipe_context* pipe,
}
}
+ /* Use fast color clear for an AA colorbuffer.
+ * The CMASK is shared between all colorbuffers, so we use it
+ * if there is only one colorbuffer bound. */
+ if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 &&
+ r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) {
+ /* Try to obtain the access to the CMASK if we don't have one. */
+ if (!r300->cmask_access) {
+ r300->cmask_access =
+ r300->rws->cs_request_feature(r300->cs,
+ RADEON_FID_R300_CMASK_ACCESS,
+ TRUE);
+ }
+
+ /* Setup the clear. */
+ if (r300->cmask_access) {
+ /* Pair the resource with the CMASK to avoid other resources
+ * accessing it. */
+ if (!r300->screen->cmask_resource) {
+ pipe_mutex_lock(r300->screen->cmask_mutex);
+ /* Double checking (first unlocked, then locked). */
+ if (!r300->screen->cmask_resource) {
+ /* Don't reference this, so that the texture can be
+ * destroyed while set in cmask_resource.
+ * Then in texture_destroy, we set cmask_resource to NULL. */
+ r300->screen->cmask_resource = fb->cbufs[0]->texture;
+ }
+ pipe_mutex_unlock(r300->screen->cmask_mutex);
+ }
+
+ if (r300->screen->cmask_resource == fb->cbufs[0]->texture) {
+ r300_set_clear_color(r300, color);
+ r300_mark_atom_dirty(r300, &r300->cmask_clear);
+ buffers &= ~PIPE_CLEAR_COLOR;
+ }
+ }
+ }
/* Enable CBZB clear. */
- if (r300_cbzb_clear_allowed(r300, buffers)) {
+ else if (r300_cbzb_clear_allowed(r300, buffers)) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
hyperz->zb_depthclearvalue =
@@ -312,13 +368,16 @@ static void r300_clear(struct pipe_context* pipe,
fb->nr_cbufs,
buffers, cformat, color, depth, stencil);
r300_blitter_end(r300);
- } else if (r300->zmask_clear.dirty || r300->hiz_clear.dirty) {
+ } else if (r300->zmask_clear.dirty ||
+ r300->hiz_clear.dirty ||
+ r300->cmask_clear.dirty) {
/* Just clear zmask and hiz now, this does not use the standard draw
* procedure. */
/* Calculate zmask_clear and hiz_clear atom sizes. */
unsigned dwords =
(r300->zmask_clear.dirty ? r300->zmask_clear.size : 0) +
(r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ (r300->cmask_clear.dirty ? r300->cmask_clear.size : 0) +
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
@@ -337,6 +396,11 @@ static void r300_clear(struct pipe_context* pipe,
r300->hiz_clear.state);
r300->hiz_clear.dirty = FALSE;
}
+ if (r300->cmask_clear.dirty) {
+ r300_emit_cmask_clear(r300, r300->cmask_clear.size,
+ r300->cmask_clear.state);
+ r300->cmask_clear.dirty = FALSE;
+ }
} else {
assert(0);
}