diff options
author | Nicolai Hähnle <[email protected]> | 2016-11-02 19:07:40 +0100 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2016-11-04 21:30:18 +0100 |
commit | 2c875158e2763d57e5dae8892af96a894bdb7dc9 (patch) | |
tree | dfcb3a05ef6630e6e09e29210a7f3c1032f9865b /src/gallium/drivers | |
parent | 322483f71b068b3bbf69e5434e888f3fd3f4589e (diff) |
radeonsi: fix vertex fetches for 2_10_10_10 formats
The hardware always treats the alpha channel as unsigned, so add a shader
workaround. This is rare enough that we'll just build a monolithic vertex
shader.
The SINT case cannot actually happen in OpenGL, but I've included it for
completeness since it's just a mix of the other cases.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 54 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 4 |
5 files changed, 78 insertions, 6 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 28a8b1fe9e9..b170eb97d48 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -369,17 +369,16 @@ static LLVMValueRef get_instance_index_for_fetch( } static void declare_input_vs( - struct si_shader_context *radeon_bld, + struct si_shader_context *ctx, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { - struct lp_build_context *base = &radeon_bld->soa.bld_base.base; + struct lp_build_context *base = &ctx->soa.bld_base.base; struct gallivm_state *gallivm = base->gallivm; - struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); unsigned chan; + unsigned fix_fetch; LLVMValueRef t_list_ptr; LLVMValueRef t_offset; @@ -399,7 +398,7 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - buffer_index = LLVMGetParam(radeon_bld->main_fn, + buffer_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); @@ -416,6 +415,45 @@ static void declare_input_vs( out[chan] = LLVMBuildExtractElement(gallivm->builder, input, llvm_chan, ""); } + + fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3; + if (fix_fetch) { + /* The hardware returns an unsigned value; convert it to a + * signed one. + */ + LLVMValueRef tmp = out[3]; + LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0); + + /* First, recover the sign-extended signed integer value. */ + if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) + tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, ""); + else + tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, ""); + + /* For the integer-like cases, do a natural sign extension. + * + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 + * and happen to contain 0, 1, 2, 3 as the two LSBs of the + * exponent. + */ + tmp = LLVMBuildShl(gallivm->builder, tmp, + fix_fetch == SI_FIX_FETCH_A2_SNORM ? + LLVMConstInt(ctx->i32, 7, 0) : c30, ""); + tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, ""); + + /* Convert back to the right type. */ + if (fix_fetch == SI_FIX_FETCH_A2_SNORM) { + LLVMValueRef clamp; + LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, ""); + clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, ""); + tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, ""); + } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) { + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, ""); + } + + out[3] = tmp; + } } static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base, @@ -8102,11 +8140,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, /* LS, ES, VS are compiled on demand if the main part hasn't been * compiled for that stage. + * + * Vertex shaders are compiled on demand when a vertex fetch + * workaround must be applied. */ if (!mainp || (sel->type == PIPE_SHADER_VERTEX && (shader->key.vs.as_es != mainp->key.vs.as_es || - shader->key.vs.as_ls != mainp->key.vs.as_ls)) || + shader->key.vs.as_ls != mainp->key.vs.as_ls || + shader->key.vs.fix_fetch)) || (sel->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es != mainp->key.tes.as_es) || (sel->type == PIPE_SHADER_TESS_CTRL && diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d8ab2a41c9c..59e7bfb0457 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -233,6 +233,14 @@ enum { TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, }; +/* For VS shader key fix_fetch. */ +enum { + SI_FIX_FETCH_NONE = 0, + SI_FIX_FETCH_A2_SNORM = 1, + SI_FIX_FETCH_A2_SSCALED = 2, + SI_FIX_FETCH_A2_SINT = 3, +}; + struct si_shader; /* A shader selector is a gallium CSO and contains shader variants and @@ -400,6 +408,9 @@ union si_shader_key { struct si_vs_epilog_bits epilog; unsigned as_es:1; /* export shader */ unsigned as_ls:1; /* local shader */ + + /* One pair of bits for every input: SI_FIX_FETCH_* enums. */ + uint32_t fix_fetch; } vs; struct { struct si_tcs_epilog_bits epilog; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 642ce79ab44..24c7b100fea 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3281,6 +3281,20 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); v->format_size[i] = desc->block.bits / 8; + + /* The hardware always treats the 2-bit alpha channel as + * unsigned, so a shader workaround is needed. + */ + if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { + if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { + v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i); + } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { + v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i); + } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { + /* This isn't actually used in OpenGL. */ + v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i); + } + } } memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3ebf578e593..c444a699ed6 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -99,6 +99,7 @@ struct si_stencil_ref { struct si_vertex_element { unsigned count; + uint32_t fix_fetch; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint32_t format_size[SI_MAX_ATTRIBS]; struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 2a41bf1c20a..9e95fea33af 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -872,6 +872,10 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, for (i = 0; i < count; ++i) key->vs.prolog.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; + + key->vs.fix_fetch = + sctx->vertex_elements->fix_fetch & + u_bit_consecutive(0, 2 * count); } if (sctx->tes_shader.cso) key->vs.as_ls = 1; |