r600g: implement compression for MSAA colorbuffers for evergreen

This adds the FMASK and CMASK buffers. They share the same resource with color data. COMPRESSION and FAST_CLEAR are always enabled if both FMASK and CMASK are allocated. We initialize the CMASK to a "compressed" state (not "fast cleared"), so that we can keep FAST_CLEAR enabled all the time. Both FMASK and CMASK must be present at the moment. If either one is missing, the other one is not used. v2: add cayman regs in the list Reviewed-by: Jerome Glisse <jglisse@redhat.com>
author: Marek Olšák <maraeo@gmail.com> 2012-08-12 20:06:33 +0200
committer: Marek Olšák <maraeo@gmail.com> 2012-08-27 04:31:00 +0200
commit: a3d9d7ec79d6f7205fab2324e47d8ea185431de0 (patch)
tree: 3aa8b27b500d9e7535053e01c5345f025ab4f86a /src/gallium/drivers/r600/r600_texture.c
parent: 48edfe0505ee79d35f770f53b9c9b7ca3c69fd2b (diff)
1 files changed, 109 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 90a834ec634..acb77d5c4dd 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -252,6 +252,97 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 	NULL				/* transfer_inline_write */
 };
 
+static void r600_texture_allocate_fmask(struct r600_screen *rscreen,
+					struct r600_texture *rtex)
+{
+	/* FMASK is allocated pretty much like an ordinary texture.
+	 * Here we use bpe in the units of bits, not bytes. */
+	struct radeon_surface fmask = rtex->surface;
+	unsigned nr_samples = rtex->resource.b.b.nr_samples;
+
+	switch (nr_samples) {
+	case 2:
+		/* This should be 8,1, but we should set nsamples > 1
+		 * for the allocator to treat it as a multisample surface.
+		 * Let's set 4,2 then. */
+	case 4:
+		fmask.bpe = 4;
+		fmask.nsamples = 2;
+		break;
+	case 8:
+		fmask.bpe = 8;
+		fmask.nsamples = 4;
+		break;
+	case 16:
+		fmask.bpe = 16;
+		fmask.nsamples = 4;
+		break;
+	default:
+		R600_ERR("Invalid sample count for FMASK allocation.\n");
+		return;
+	}
+
+	if (rscreen->chip_class >= EVERGREEN) {
+		fmask.bankh = nr_samples <= 4 ? 4 : 1;
+	}
+
+	if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
+		R600_ERR("Got error in surface_init while allocating FMASK.\n");
+		return;
+	}
+	assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+
+	/* Reserve space for FMASK while converting bits back to bytes. */
+	rtex->fmask_bank_height = fmask.bankh;
+	rtex->fmask_offset = align(rtex->size, MAX2(256, fmask.bo_alignment));
+	rtex->fmask_size = (fmask.bo_size + 7) / 8;
+	rtex->size = rtex->fmask_offset + rtex->fmask_size;
+#if 0
+	printf("FMASK width=%u, height=%i, bits=%u, size=%u\n",
+	       fmask.npix_x, fmask.npix_y, fmask.bpe * fmask.nsamples, rtex->fmask_size);
+#endif
+}
+
+static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
+					struct r600_texture *rtex)
+{
+	unsigned cmask_tile_width = 8;
+	unsigned cmask_tile_height = 8;
+	unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
+	unsigned element_bits = 4;
+	unsigned cmask_cache_bits = 1024;
+	unsigned num_pipes = rscreen->tiling_info.num_channels;
+	unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+
+	unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
+	unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
+	unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
+	unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
+	unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
+
+	unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
+	unsigned height = align(rtex->surface.npix_y, macro_tile_height);
+
+	unsigned base_align = num_pipes * pipe_interleave_bytes;
+	unsigned slice_bytes =
+		((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
+	unsigned size = rtex->surface.array_size * align(slice_bytes, base_align);
+
+	assert(macro_tile_width % 128 == 0);
+	assert(macro_tile_height % 128 == 0);
+
+	rtex->cmask_slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
+	rtex->cmask_offset = align(rtex->size, MAX2(256, base_align));
+	rtex->cmask_size = size;
+	rtex->size = rtex->cmask_offset + rtex->cmask_size;
+#if 0
+	printf("CMASK: macro tile width = %u, macro tile height = %u, "
+	       "pitch elements = %u, height = %u, slice tile max = %u\n",
+	       macro_tile_width, macro_tile_height, pitch_elements, height,
+	       rtex->cmask_slice_tile_max);
+#endif
+}
+
 static struct r600_texture *
 r600_texture_create_object(struct pipe_screen *screen,
 			   const struct pipe_resource *base,
@@ -287,6 +378,17 @@ r600_texture_create_object(struct pipe_screen *screen,
 		return NULL;
 	}
 
+	if (base->nr_samples > 1 && !rtex->is_depth && alloc_bo) {
+		r600_texture_allocate_fmask(rscreen, rtex);
+		r600_texture_allocate_cmask(rscreen, rtex);
+	}
+
+	if (!rtex->is_depth && base->nr_samples > 1 &&
+	    (!rtex->fmask_size || !rtex->cmask_size)) {
+		FREE(rtex);
+		return NULL;
+	}
+
 	/* Now create the backing buffer. */
 	if (!buf && alloc_bo) {
 		unsigned base_align = rtex->surface.bo_alignment;
@@ -301,6 +403,13 @@ r600_texture_create_object(struct pipe_screen *screen,
 		resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
 		resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 	}
+
+	if (rtex->cmask_size) {
+		/* Initialize the cmask to 0xCC (= compressed state). */
+		char *ptr = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+		memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
+		rscreen->ws->buffer_unmap(resource->cs_buf);
+	}
 	return rtex;
 }
author	Marek Olšák <maraeo@gmail.com>	2012-08-12 20:06:33 +0200
committer	Marek Olšák <maraeo@gmail.com>	2012-08-27 04:31:00 +0200
commit	a3d9d7ec79d6f7205fab2324e47d8ea185431de0 (patch)
tree	3aa8b27b500d9e7535053e01c5345f025ab4f86a /src/gallium/drivers/r600/r600_texture.c
parent	48edfe0505ee79d35f770f53b9c9b7ca3c69fd2b (diff)