aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-09-18 15:33:45 -0400
committerMarek Olšák <[email protected]>2019-09-23 15:34:54 -0400
commit500181b2ba0290473763115dfded4f4551b20eca (patch)
tree1e2eb2eb273a4760ca459ca022af4695b610df2d
parent09447ccc78f5b08e161bfed9fdfdbf7dd8999d88 (diff)
ac/nir: port Z compare value clamping from radeonsi
This fixes some dEQP tests. Reviewed-by: Connor Abbott <[email protected]>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c34
1 files changed, 25 insertions, 9 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7294b293ca2..b4b4e423a8d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4019,8 +4019,10 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
case nir_tex_src_projector:
break;
case nir_tex_src_comparator:
- if (instr->is_shadow)
+ if (instr->is_shadow) {
args.compare = get_src(ctx, instr->src[i].src);
+ args.compare = ac_to_float(&ctx->ac, args.compare);
+ }
break;
case nir_tex_src_offset:
args.offset = get_src(ctx, instr->src[i].src);
@@ -4104,19 +4106,33 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
args.offset = pack;
}
- /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
- * so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here for GFX8-9; GFX10 has an explicitly
- * clamped 32-bit float format.
+ /* Section 8.23.1 (Depth Texture Comparison Mode) of the
+ * OpenGL 4.5 spec says:
+ *
+ * "If the texture’s internal format indicates a fixed-point
+ * depth texture, then D_t and D_ref are clamped to the
+ * range [0, 1]; otherwise no clamping is performed."
*
- * It's unnecessary if the original texture format was
- * Z32_FLOAT, but we don't know that here.
+ * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
+ * an explicitly clamped 32-bit float format.
*/
if (args.compare &&
ctx->ac.chip_class >= GFX8 &&
ctx->ac.chip_class <= GFX9 &&
- ctx->abi->clamp_shadow_reference)
- args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
+ ctx->abi->clamp_shadow_reference) {
+ LLVMValueRef upgraded, clamped;
+
+ upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
+ LLVMConstInt(ctx->ac.i32, 3, false), "");
+ upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
+ LLVMConstInt(ctx->ac.i32, 29, false), "");
+ upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
+ clamped = ac_build_clamp(&ctx->ac, args.compare);
+ args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
+ args.compare, "");
+ }
/* pack derivatives */
if (ddx || ddy) {