diff options
author | Marek Olšák <[email protected]> | 2012-08-12 20:06:33 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2012-08-27 04:31:00 +0200 |
commit | a3d9d7ec79d6f7205fab2324e47d8ea185431de0 (patch) | |
tree | 3aa8b27b500d9e7535053e01c5345f025ab4f86a /src/gallium/drivers/r600/r600_texture.c | |
parent | 48edfe0505ee79d35f770f53b9c9b7ca3c69fd2b (diff) |
r600g: implement compression for MSAA colorbuffers for evergreen
This adds the FMASK and CMASK buffers. They share the same resource
with color data.
COMPRESSION and FAST_CLEAR are always enabled if both FMASK and CMASK are
allocated. We initialize the CMASK to a "compressed" state (not "fast cleared"),
so that we can keep FAST_CLEAR enabled all the time.
Both FMASK and CMASK must be present at the moment. If either one is missing,
the other one is not used.
v2: add cayman regs in the list
Reviewed-by: Jerome Glisse <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/r600_texture.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 90a834ec634..acb77d5c4dd 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -252,6 +252,97 @@ static const struct u_resource_vtbl r600_texture_vtbl = NULL /* transfer_inline_write */ }; +static void r600_texture_allocate_fmask(struct r600_screen *rscreen, + struct r600_texture *rtex) +{ + /* FMASK is allocated pretty much like an ordinary texture. + * Here we use bpe in the units of bits, not bytes. */ + struct radeon_surface fmask = rtex->surface; + unsigned nr_samples = rtex->resource.b.b.nr_samples; + + switch (nr_samples) { + case 2: + /* This should be 8,1, but we should set nsamples > 1 + * for the allocator to treat it as a multisample surface. + * Let's set 4,2 then. */ + case 4: + fmask.bpe = 4; + fmask.nsamples = 2; + break; + case 8: + fmask.bpe = 8; + fmask.nsamples = 4; + break; + case 16: + fmask.bpe = 16; + fmask.nsamples = 4; + break; + default: + R600_ERR("Invalid sample count for FMASK allocation.\n"); + return; + } + + if (rscreen->chip_class >= EVERGREEN) { + fmask.bankh = nr_samples <= 4 ? 4 : 1; + } + + if (rscreen->ws->surface_init(rscreen->ws, &fmask)) { + R600_ERR("Got error in surface_init while allocating FMASK.\n"); + return; + } + assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); + + /* Reserve space for FMASK while converting bits back to bytes. */ + rtex->fmask_bank_height = fmask.bankh; + rtex->fmask_offset = align(rtex->size, MAX2(256, fmask.bo_alignment)); + rtex->fmask_size = (fmask.bo_size + 7) / 8; + rtex->size = rtex->fmask_offset + rtex->fmask_size; +#if 0 + printf("FMASK width=%u, height=%i, bits=%u, size=%u\n", + fmask.npix_x, fmask.npix_y, fmask.bpe * fmask.nsamples, rtex->fmask_size); +#endif +} + +static void r600_texture_allocate_cmask(struct r600_screen *rscreen, + struct r600_texture *rtex) +{ + unsigned cmask_tile_width = 8; + unsigned cmask_tile_height = 8; + unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; + unsigned element_bits = 4; + unsigned cmask_cache_bits = 1024; + unsigned num_pipes = rscreen->tiling_info.num_channels; + unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes; + + unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; + unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; + unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile); + unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); + unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; + + unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width); + unsigned height = align(rtex->surface.npix_y, macro_tile_height); + + unsigned base_align = num_pipes * pipe_interleave_bytes; + unsigned slice_bytes = + ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements; + unsigned size = rtex->surface.array_size * align(slice_bytes, base_align); + + assert(macro_tile_width % 128 == 0); + assert(macro_tile_height % 128 == 0); + + rtex->cmask_slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; + rtex->cmask_offset = align(rtex->size, MAX2(256, base_align)); + rtex->cmask_size = size; + rtex->size = rtex->cmask_offset + rtex->cmask_size; +#if 0 + printf("CMASK: macro tile width = %u, macro tile height = %u, " + "pitch elements = %u, height = %u, slice tile max = %u\n", + macro_tile_width, macro_tile_height, pitch_elements, height, + rtex->cmask_slice_tile_max); +#endif +} + static struct r600_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, @@ -287,6 +378,17 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + if (base->nr_samples > 1 && !rtex->is_depth && alloc_bo) { + r600_texture_allocate_fmask(rscreen, rtex); + r600_texture_allocate_cmask(rscreen, rtex); + } + + if (!rtex->is_depth && base->nr_samples > 1 && + (!rtex->fmask_size || !rtex->cmask_size)) { + FREE(rtex); + return NULL; + } + /* Now create the backing buffer. */ if (!buf && alloc_bo) { unsigned base_align = rtex->surface.bo_alignment; @@ -301,6 +403,13 @@ r600_texture_create_object(struct pipe_screen *screen, resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf); resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM; } + + if (rtex->cmask_size) { + /* Initialize the cmask to 0xCC (= compressed state). */ + char *ptr = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE); + memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size); + rscreen->ws->buffer_unmap(resource->cs_buf); + } return rtex; } |