diff options
-rw-r--r-- | src/gallium/auxiliary/util/u_blitter.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_blitter.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 21 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 133 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 4 |
13 files changed, 270 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 4d6cdd7a244..f4ac4aa8685 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter) FREE(ctx); } +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + ctx->has_texture_multisample = supported; +} + static void blitter_set_running_flag(struct blitter_context_priv *ctx) { if (ctx->base.running) { diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index de063937793..c49faaad717 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) return blitter->pipe; } +/** + * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver. + */ +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported); + /* The default function to draw a rectangle. This can only be used * inside of the draw_rectangle callback if the driver overrides it. */ void util_blitter_draw_rectangle(struct blitter_context *blitter, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 96e246a6e68..17b7e9d2c72 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen, return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 19) + if (!rscreen->has_msaa) return FALSE; switch (sample_count) { @@ -1074,11 +1074,24 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; - if (state->u.tex.last_level && texture->nr_samples <= 1) { + + /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ + if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) { + /* XXX the 2x and 4x cases are broken. */ + if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) { + /* disable FMASK (0 = disabled) */ + view->tex_resource_words[3] = 0; + view->skip_mip_address_reloc = true; + } else { + /* FMASK should be in MIP_ADDRESS for multisample textures */ + view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8; + } + } else if (state->u.tex.last_level && texture->nr_samples <= 1) { view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; } else { view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; } + view->tex_resource_words[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian)); @@ -1582,9 +1595,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, rctx->framebuffer.export_16bpc = false; } - /* Cayman can fetch from a compressed MSAA colorbuffer, - * so it's pointless to track them. */ - if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + if (rtex->fmask_size && rtex->cmask_size) { rctx->framebuffer.compressed_cb_mask |= 1 << i; } } @@ -2258,13 +2269,15 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 8); r600_write_array(cs, 8, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); - r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, reloc); + + if (!rview->skip_mip_address_reloc) { + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, reloc); + } } state->dirty_mask = 0; } @@ -3345,6 +3358,16 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx) return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS); } +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + blend.independent_blend_enable = true; + blend.rt[0].colormask = 0xf; + return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS); +} + void *evergreen_create_db_flush_dsa(struct r600_context *rctx) { struct pipe_depth_stencil_alpha_state dsa = {{0}}; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 98df83de918..edb1a55dc8a 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -486,7 +486,7 @@ #define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002 #define V_028808_CB_RESOLVE 0x00000003 #define V_028808_CB_DECOMPRESS 0x00000004 -#define V_028808_CB_FASK_DECOMPRESS 0x00000005 +#define V_028808_CB_FMASK_DECOMPRESS 0x00000005 #define S_028808_ROP3(x) (((x) & 0xFF) << 16) #define G_028808_ROP3(x) (((x) >> 16) & 0xFF) #define C_028808_ROP3 0xFF00FFFF diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 51a2e4ee9e5..f04a92062f6 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void) return tex; } -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode) { if ((chip_class == R600) && (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { @@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, en LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; + bc->msaa_texture_mode = msaa_texture_mode; } static int r600_bytecode_add_cf(struct r600_bytecode *bc) @@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); @@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, assert(count < 32); memset(&bc, 0, sizeof(bc)); - r600_bytecode_init(&bc, rctx->chip_class, rctx->family); + r600_bytecode_init(&bc, rctx->chip_class, rctx->family, + rctx->screen->msaa_texture_support); for (i = 0; i < count; i++) { if (elements[i].instance_divisor > 1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 8a9f3189be0..2c7db2cefd7 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -62,6 +62,7 @@ struct r600_bytecode_alu { struct r600_bytecode_tex { struct list_head list; unsigned inst; + unsigned inst_mod; unsigned resource_id; unsigned src_gpr; unsigned src_rel; @@ -195,6 +196,7 @@ struct r600_cf_callstack { struct r600_bytecode { enum chip_class chip_class; + enum r600_msaa_texture_mode msaa_texture_mode; int type; struct list_head cf; struct r600_bytecode_cf *cf_last; @@ -219,7 +221,10 @@ struct r600_bytecode { int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family); +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode); void r600_bytecode_clear(struct r600_bytecode *bc); int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 8597b8dfcf7..a19248da3a2 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, { struct r600_context *rctx = (struct r600_context *)ctx; unsigned layer, level, checked_last_layer, max_layer; - - assert(rctx->chip_class != CAYMAN); + void *blend_decompress; if (!rtex->dirty_level_mask) return; + switch (rctx->screen->msaa_texture_support) { + case MSAA_TEXTURE_DECOMPRESSED: + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_COMPRESSED: + /* XXX the 2x and 4x cases are broken. */ + if (rtex->resource.b.b.nr_samples == 8) + blend_decompress = rctx->custom_blend_fmask_decompress; + else + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_SAMPLE_ZERO: + default: + /* Nothing to do. */ + rtex->dirty_level_mask = 0; + return; + } + for (level = first_level; level <= last_level; level++) { if (!(rtex->dirty_level_mask & (1 << level))) continue; @@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); r600_blitter_begin(ctx, R600_DECOMPRESS); - util_blitter_custom_color(rctx->blitter, cbsurf, - rctx->custom_blend_decompress); + util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress); r600_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); @@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context *rctx, unsigned i; unsigned mask = textures->compressed_colortex_mask; - /* Cayman cannot decompress an MSAA colorbuffer, - * but it can read it compressed, so skip this. */ - assert(rctx->chip_class != CAYMAN); - if (rctx->chip_class == CAYMAN) { - return; - } - while (mask) { struct pipe_sampler_view *view; struct r600_texture *tex; @@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, unsigned level, unsigned first_layer, unsigned last_layer) { - struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rtex = (struct r600_texture*)tex; if (rtex->is_depth && !rtex->is_flushing_texture) { @@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, level, level, first_layer, last_layer, 0, u_max_sample(tex)); - } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + } else if (rtex->fmask_size && rtex->cmask_size) { r600_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } @@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_sampler_view src_templ, *src_view; unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL; struct pipe_box sbox; + bool copy_all_samples; /* Handle buffers first. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { @@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src_widthFL, src_heightFL); } + copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Copy. */ - /* XXX Multisample texturing is unimplemented on Cayman. In the meantime, - * copy only the first sample (which is the only one that is uncompressed - * and therefore doesn't return garbage). */ r600_blitter_begin(ctx, R600_COPY_TEXTURE); util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty, abs(src_box->width), abs(src_box->height), src_view, src_box, src_width0, src_height0, PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, - rctx->chip_class != CAYMAN); + copy_all_samples); r600_blitter_end(ctx); pipe_surface_reference(&dst_view, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 916fa381a33..7a1e1353553 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context *context) if (rctx->custom_blend_decompress) { rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress); } + if (rctx->custom_blend_fmask_decompress) { + rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress); + } util_unreference_framebuffer_state(&rctx->framebuffer.state); r600_context_fini(rctx); @@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); + rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR || rctx->family == CHIP_PALM || rctx->family == CHIP_SUMO || @@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) goto fail; + util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); @@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_TEXTURE_MULTISAMPLE: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: return 130; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Supported except the original R600. */ case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: @@ -947,6 +954,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) break; } + /* MSAA support. */ + switch (rscreen->chip_class) { + case R600: + case R700: + rscreen->has_msaa = rscreen->info.drm_minor >= 22; + rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED; + break; + case EVERGREEN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + rscreen->msaa_texture_support = + rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED : + MSAA_TEXTURE_DECOMPRESSED; + break; + case CAYMAN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + /* We should be able to read compressed MSAA textures, but it doesn't work. */ + rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO; + break; + } + if (r600_init_tiling(rscreen)) { FREE(rscreen); return NULL; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 17dab7f23d5..238ab1676f4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -184,6 +184,22 @@ struct r600_pipe_fences { pipe_mutex mutex; }; +enum r600_msaa_texture_mode { + /* If the hw can fetch the first sample only (no decompression available). + * This means MSAA texturing is not fully implemented. */ + MSAA_TEXTURE_SAMPLE_ZERO, + + /* If the hw can fetch decompressed MSAA textures. + * Supported families: R600, R700, Evergreen. + * Cayman cannot use this, because it cannot do the decompression. */ + MSAA_TEXTURE_DECOMPRESSED, + + /* If the hw can fetch compressed MSAA textures, which means shaders can + * read resolved FMASK. This yields the best performance. + * Supported families: Evergreen, Cayman. */ + MSAA_TEXTURE_COMPRESSED +}; + struct r600_screen { struct pipe_screen screen; struct radeon_winsys *ws; @@ -191,6 +207,8 @@ struct r600_screen { enum chip_class chip_class; struct radeon_info info; bool has_streamout; + bool has_msaa; + enum r600_msaa_texture_mode msaa_texture_support; struct r600_tiling_info tiling_info; struct r600_pipe_fences fences; @@ -205,6 +223,7 @@ struct r600_pipe_sampler_view { struct pipe_sampler_view base; struct r600_resource *tex_resource; uint32_t tex_resource_words[8]; + bool skip_mip_address_reloc; }; struct r600_rasterizer_state { @@ -372,6 +391,7 @@ struct r600_context { void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_decompress; + void *custom_blend_fmask_decompress; /* With rasterizer discard, there doesn't have to be a pixel shader. * In that case, we bind this one: */ void *dummy_pixel_shader; @@ -525,6 +545,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader void *evergreen_create_db_flush_dsa(struct r600_context *rctx); void *evergreen_create_resolve_blend(struct r600_context *rctx); void *evergreen_create_decompress_blend(struct r600_context *rctx); +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c56efda5347..0b586f3aedb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1180,7 +1180,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, ctx.shader = shader; ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family); + r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family, + rscreen->msaa_texture_support); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -3796,10 +3797,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i, j; int opcode; + bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED && + inst->Instruction.Opcode == TGSI_OPCODE_TXF && + (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && - tgsi_tex_src_requires_loading(ctx, 0); + const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && + tgsi_tex_src_requires_loading(ctx, 0)) || + read_compressed_msaa; boolean src_loaded = FALSE; unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; uint8_t offset_x = 0, offset_y = 0, offset_z = 0; @@ -4070,6 +4076,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->temp_reg; } + /* Obtain the sample index for reading a compressed MSAA color texture. + * To read the FMASK, we use the ldfptr instruction, which tells us + * where the samples are stored. + * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * Assume src.z contains the sample index. It should be modified like this: + * src.z = (ldfptr() >> (src.z * 4)) & 0xF; + * Then fetch the texel with src. + */ + if (read_compressed_msaa) { + unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4; + unsigned temp = r600_get_temp(ctx); + assert(src_loaded); + + /* temp.w = ldfptr() */ + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.inst = SQ_TEX_INST_LD; + tex.inst_mod = 1; /* to indicate this is ldfptr */ + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + tex.src_gpr = src_gpr; + tex.dst_gpr = temp; + tex.dst_sel_x = 7; /* mask out these components */ + tex.dst_sel_y = 7; + tex.dst_sel_z = 7; + tex.dst_sel_w = 0; /* store X */ + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + tex.offset_x = offset_x; + tex.offset_y = offset_y; + tex.offset_z = offset_z; + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + + /* temp.x = sample_index*4 */ + if (ctx->bc->chip_class == CAYMAN) { + for (i = 0 ; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = i; + alu.dst.write = i == 0; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* sample_index = temp.w >> temp.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT); + alu.src[0].sel = temp; + alu.src[0].chan = 3; + alu.src[1].sel = temp; + alu.src[1].chan = 0; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* sample_index & 0xF */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0xF; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; +#if 0 + /* visualize the FMASK */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + alu.dst.chan = i; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +#endif + } + opcode = ctx->inst_info->r600_opcode; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 4b2a19a07f7..587f88deb9e 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -375,6 +375,9 @@ #define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) #define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) #define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF +#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5) +#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3) +#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F #define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) #define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) #define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d07008f16d..1a8d55e8d36 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 22) + if (!rscreen->has_msaa) return FALSE; /* R11G11B10 is broken on R6xx. */ @@ -1988,7 +1988,6 @@ static void r600_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 7); r600_write_array(cs, 7, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 65985c7653d..a4d3e461ef1 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, dst->views.compressed_depthtex_mask &= ~(1 << i); } - /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */ - if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) { + /* Track compressed colorbuffers. */ + if (rtex->cmask_size && rtex->fmask_size) { dst->views.compressed_colortex_mask |= 1 << i; } else { dst->views.compressed_colortex_mask &= ~(1 << i); |