diff options
author | Rob Clark <[email protected]> | 2016-04-19 09:02:23 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2016-04-24 13:40:57 -0400 |
commit | adf795432f788b33822d3a94b704be4ca536c8f1 (patch) | |
tree | ea8b334ab469492fa9666eabba7a75a19172aca8 /src/gallium/drivers/freedreno/ir3 | |
parent | a148300b13fbda239146c163549868cc903c0a70 (diff) |
freedreno/a4xx: better workaround for astc+srgb
This *seems* like a hw bug, and maybe only applies to certain a4xx
variants/revisions. But setting the SRGB bit in sampler view state
(texconst0) causes invalid alpha for ASTC textures. Work around this
setting up a second texture state and using that to sample alpha
separately.
This way, srgb->linear conversion happens in hw *prior* to
interpolation.
This fixes 546 dEQP tests: dEQP-GLES3.functional.texture.*astc*srgb*
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 92 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 11 |
5 files changed, 107 insertions, 12 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f68275e568c..a40d3aa3b40 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -377,6 +377,12 @@ struct ir3 { unsigned keeps_count, keeps_sz; struct ir3_instruction **keeps; + /* Track texture sample instructions which need texture state + * patched in (for astc-srgb workaround): + */ + unsigned astc_srgb_count, astc_srgb_sz; + struct ir3_instruction **astc_srgb; + /* List of blocks: */ struct list_head block_list; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 027673afe1c..b8b9e4a0518 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -94,6 +94,7 @@ static void print_usage(void) printf(" --saturate-s MASK - bitmask of samplers to saturate S coord\n"); printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n"); printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n"); + printf(" --astc-srgb MASK - bitmask of samplers to enable astc-srgb workaround\n"); printf(" --stream-out - enable stream-out (aka transform feedback)\n"); printf(" --ucp MASK - bitmask of enabled user-clip-planes\n"); printf(" --gpu GPU_ID - specify gpu-id (default 320)\n"); @@ -174,6 +175,13 @@ int main(int argc, char **argv) continue; } + if (!strcmp(argv[n], "--astc-srgb")) { + debug_printf(" %s %s", argv[n], argv[n+1]); + key.vastc_srgb = key.fastc_srgb = strtol(argv[n+1], NULL, 0); + n += 2; + continue; + } + if (!strcmp(argv[n], "--stream-out")) { struct pipe_stream_output_info *so = &s.stream_output; debug_printf(" %s", argv[n]); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 940ca7744a2..abdb1c27c91 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -108,8 +108,10 @@ struct ir3_compile { */ bool array_index_add_half; - /* for looking up which system value is which */ - unsigned sysval_semantics[8]; + /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ + unsigned astc_srgb; + + unsigned max_texture_index; /* set if we encounter something we can't handle yet, so we * can bail cleanly and fallback to TGSI compiler f/e @@ -134,6 +136,12 @@ compile_init(struct ir3_compiler *compiler, ctx->levels_add_one = false; ctx->unminify_coords = false; ctx->array_index_add_half = true; + + if (so->type == SHADER_VERTEX) + ctx->astc_srgb = so->key.vastc_srgb; + else if (so->type == SHADER_FRAGMENT) + ctx->astc_srgb = so->key.fastc_srgb; + } else { /* no special handling for "flat" */ ctx->flat_bypass = false; @@ -620,14 +628,14 @@ create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp) */ static void split_dest(struct ir3_block *block, struct ir3_instruction **dst, - struct ir3_instruction *src, unsigned n) + struct ir3_instruction *src, unsigned base, unsigned n) { struct ir3_instruction *prev = NULL; for (int i = 0, j = 0; i < n; i++) { struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; - split->fo.off = i; + split->fo.off = i + base; if (prev) { split->cp.left = prev; @@ -637,7 +645,7 @@ split_dest(struct ir3_block *block, struct ir3_instruction **dst, } prev = split; - if (src->regs[0]->wrmask & (1 << i)) + if (src->regs[0]->wrmask & (1 << (i + base))) dst[j++] = split; } } @@ -1543,12 +1551,35 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) if (opc == OPC_GETLOD) type = TYPE_U32; - sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, - flags, tex->texture_index, tex->texture_index, - create_collect(b, src0, nsrc0), - create_collect(b, src1, nsrc1)); + unsigned tex_idx = tex->texture_index; + + ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx); + + struct ir3_instruction *col0 = create_collect(b, src0, nsrc0); + struct ir3_instruction *col1 = create_collect(b, src1, nsrc1); + + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, + tex_idx, tex_idx, col0, col1); + + if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) { + /* only need first 3 components: */ + sam->regs[0]->wrmask = 0x7; + split_dest(b, dst, sam, 0, 3); - split_dest(b, dst, sam, 4); + /* we need to sample the alpha separately with a non-ASTC + * texture state: + */ + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags, + tex_idx, tex_idx, col0, col1); + + array_insert(ctx->ir->astc_srgb, sam); + + /* fixup .w component: */ + split_dest(b, &dst[3], sam, 3, 1); + } else { + /* normal (non-workaround) case: */ + split_dest(b, dst, sam, 0, 4); + } /* GETLOD returns results in 4.8 fixed point */ if (opc == OPC_GETLOD) { @@ -1576,7 +1607,7 @@ emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) /* even though there is only one component, since it ends * up in .z rather than .x, we need a split_dest() */ - split_dest(b, dst, sam, 3); + split_dest(b, dst, sam, 0, 3); /* The # of levels comes from getinfo.z. We need to add 1 to it, since * the value in TEX_CONST_0 is zero-based. @@ -1610,7 +1641,7 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex->texture_index, tex->texture_index, lod, NULL); - split_dest(b, dst, sam, 4); + split_dest(b, dst, sam, 0, 4); /* Array size actually ends up in .w rather than .z. This doesn't * matter for miplevel 0, but for higher mips the value in z is @@ -2268,6 +2299,40 @@ fixup_frag_inputs(struct ir3_compile *ctx) ir->inputs = inputs; } +/* Fixup tex sampler state for astc/srgb workaround instructions. We + * need to assign the tex state indexes for these after we know the + * max tex index. + */ +static void +fixup_astc_srgb(struct ir3_compile *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + /* indexed by original tex idx, value is newly assigned alpha sampler + * state tex idx. Zero is invalid since there is at least one sampler + * if we get here. + */ + unsigned alt_tex_state[16] = {0}; + unsigned tex_idx = ctx->max_texture_index + 1; + unsigned idx = 0; + + so->astc_srgb.base = tex_idx; + + for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) { + struct ir3_instruction *sam = ctx->ir->astc_srgb[i]; + + compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state)); + + if (alt_tex_state[sam->cat5.tex] == 0) { + /* assign new alternate/alpha tex state slot: */ + alt_tex_state[sam->cat5.tex] = tex_idx++; + so->astc_srgb.orig_idx[idx++] = sam->cat5.tex; + so->astc_srgb.count++; + } + + sam->cat5.tex = alt_tex_state[sam->cat5.tex]; + } +} + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so) @@ -2433,6 +2498,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, so->inputs[i].compmask = compmask; } + if (ctx->astc_srgb) + fixup_astc_srgb(ctx); + /* We need to do legalize after (for frag shader's) the "bary.f" * offsets (inloc) have been assigned. */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index c05b52e7a5e..435a565e61e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -223,6 +223,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.vsaturate_s = 0; key.vsaturate_t = 0; key.vsaturate_r = 0; + key.vastc_srgb = 0; } break; case SHADER_VERTEX: @@ -233,6 +234,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) key.fsaturate_s = 0; key.fsaturate_t = 0; key.fsaturate_r = 0; + key.fastc_srgb = 0; } break; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c89dc29ff08..e81e80d328f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -104,6 +104,9 @@ struct ir3_shader_key { * shader: */ uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; }; static inline bool @@ -222,6 +225,14 @@ struct ir3_shader_variant { uint32_t val[4]; } immediates[64]; + /* for astc srgb workaround, the number/base of additional + * alpha tex states we need, and index of original tex states + */ + struct { + unsigned base, count; + unsigned orig_idx[16]; + } astc_srgb; + /* shader variants form a linked list: */ struct ir3_shader_variant *next; |