summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_texture.c
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2012-08-12 20:06:33 +0200
committerMarek Olšák <maraeo@gmail.com>2012-08-27 04:31:00 +0200
commita3d9d7ec79d6f7205fab2324e47d8ea185431de0 (patch)
tree3aa8b27b500d9e7535053e01c5345f025ab4f86a /src/gallium/drivers/r600/r600_texture.c
parent48edfe0505ee79d35f770f53b9c9b7ca3c69fd2b (diff)
r600g: implement compression for MSAA colorbuffers for evergreen
This adds the FMASK and CMASK buffers. They share the same resource with color data. COMPRESSION and FAST_CLEAR are always enabled if both FMASK and CMASK are allocated. We initialize the CMASK to a "compressed" state (not "fast cleared"), so that we can keep FAST_CLEAR enabled all the time. Both FMASK and CMASK must be present at the moment. If either one is missing, the other one is not used. v2: add cayman regs in the list Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Diffstat (limited to 'src/gallium/drivers/r600/r600_texture.c')
-rw-r--r--src/gallium/drivers/r600/r600_texture.c109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 90a834ec634..acb77d5c4dd 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -252,6 +252,97 @@ static const struct u_resource_vtbl r600_texture_vtbl =
NULL /* transfer_inline_write */
};
+static void r600_texture_allocate_fmask(struct r600_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ /* FMASK is allocated pretty much like an ordinary texture.
+ * Here we use bpe in the units of bits, not bytes. */
+ struct radeon_surface fmask = rtex->surface;
+ unsigned nr_samples = rtex->resource.b.b.nr_samples;
+
+ switch (nr_samples) {
+ case 2:
+ /* This should be 8,1, but we should set nsamples > 1
+ * for the allocator to treat it as a multisample surface.
+ * Let's set 4,2 then. */
+ case 4:
+ fmask.bpe = 4;
+ fmask.nsamples = 2;
+ break;
+ case 8:
+ fmask.bpe = 8;
+ fmask.nsamples = 4;
+ break;
+ case 16:
+ fmask.bpe = 16;
+ fmask.nsamples = 4;
+ break;
+ default:
+ R600_ERR("Invalid sample count for FMASK allocation.\n");
+ return;
+ }
+
+ if (rscreen->chip_class >= EVERGREEN) {
+ fmask.bankh = nr_samples <= 4 ? 4 : 1;
+ }
+
+ if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
+ R600_ERR("Got error in surface_init while allocating FMASK.\n");
+ return;
+ }
+ assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+
+ /* Reserve space for FMASK while converting bits back to bytes. */
+ rtex->fmask_bank_height = fmask.bankh;
+ rtex->fmask_offset = align(rtex->size, MAX2(256, fmask.bo_alignment));
+ rtex->fmask_size = (fmask.bo_size + 7) / 8;
+ rtex->size = rtex->fmask_offset + rtex->fmask_size;
+#if 0
+ printf("FMASK width=%u, height=%i, bits=%u, size=%u\n",
+ fmask.npix_x, fmask.npix_y, fmask.bpe * fmask.nsamples, rtex->fmask_size);
+#endif
+}
+
+static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ unsigned cmask_tile_width = 8;
+ unsigned cmask_tile_height = 8;
+ unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
+ unsigned element_bits = 4;
+ unsigned cmask_cache_bits = 1024;
+ unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+
+ unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
+ unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
+ unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
+ unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
+ unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
+
+ unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
+ unsigned height = align(rtex->surface.npix_y, macro_tile_height);
+
+ unsigned base_align = num_pipes * pipe_interleave_bytes;
+ unsigned slice_bytes =
+ ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
+ unsigned size = rtex->surface.array_size * align(slice_bytes, base_align);
+
+ assert(macro_tile_width % 128 == 0);
+ assert(macro_tile_height % 128 == 0);
+
+ rtex->cmask_slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
+ rtex->cmask_offset = align(rtex->size, MAX2(256, base_align));
+ rtex->cmask_size = size;
+ rtex->size = rtex->cmask_offset + rtex->cmask_size;
+#if 0
+ printf("CMASK: macro tile width = %u, macro tile height = %u, "
+ "pitch elements = %u, height = %u, slice tile max = %u\n",
+ macro_tile_width, macro_tile_height, pitch_elements, height,
+ rtex->cmask_slice_tile_max);
+#endif
+}
+
static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
@@ -287,6 +378,17 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
+ if (base->nr_samples > 1 && !rtex->is_depth && alloc_bo) {
+ r600_texture_allocate_fmask(rscreen, rtex);
+ r600_texture_allocate_cmask(rscreen, rtex);
+ }
+
+ if (!rtex->is_depth && base->nr_samples > 1 &&
+ (!rtex->fmask_size || !rtex->cmask_size)) {
+ FREE(rtex);
+ return NULL;
+ }
+
/* Now create the backing buffer. */
if (!buf && alloc_bo) {
unsigned base_align = rtex->surface.bo_alignment;
@@ -301,6 +403,13 @@ r600_texture_create_object(struct pipe_screen *screen,
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
}
+
+ if (rtex->cmask_size) {
+ /* Initialize the cmask to 0xCC (= compressed state). */
+ char *ptr = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+ memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
+ rscreen->ws->buffer_unmap(resource->cs_buf);
+ }
return rtex;
}