ac/nir: port Z compare value clamping from radeonsi

This fixes some dEQP tests. Reviewed-by: Connor Abbott <[email protected]>
author: Marek Olšák <[email protected]> 2019-09-18 15:33:45 -0400
committer: Marek Olšák <[email protected]> 2019-09-23 15:34:54 -0400
commit: 500181b2ba0290473763115dfded4f4551b20eca (patch)
tree: 1e2eb2eb273a4760ca459ca022af4695b610df2d
parent: 09447ccc78f5b08e161bfed9fdfdbf7dd8999d88 (diff)
1 files changed, 25 insertions, 9 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7294b293ca2..b4b4e423a8d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4019,8 +4019,10 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		case nir_tex_src_projector:
 			break;
 		case nir_tex_src_comparator:
-			if (instr->is_shadow)
+			if (instr->is_shadow) {
 				args.compare = get_src(ctx, instr->src[i].src);
+				args.compare = ac_to_float(&ctx->ac, args.compare);
+			}
 			break;
 		case nir_tex_src_offset:
 			args.offset = get_src(ctx, instr->src[i].src);
@@ -4104,19 +4106,33 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		args.offset = pack;
 	}
 
-	/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
-	 * so the depth comparison value isn't clamped for Z16 and
-	 * Z24 anymore. Do it manually here for GFX8-9; GFX10 has an explicitly
-	 * clamped 32-bit float format.
+	/* Section 8.23.1 (Depth Texture Comparison Mode) of the
+	 * OpenGL 4.5 spec says:
+	 *
+	 *    "If the texture’s internal format indicates a fixed-point
+	 *     depth texture, then D_t and D_ref are clamped to the
+	 *     range [0, 1]; otherwise no clamping is performed."
 	 *
-	 * It's unnecessary if the original texture format was
-	 * Z32_FLOAT, but we don't know that here.
+	 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+	 * so the depth comparison value isn't clamped for Z16 and
+	 * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
+	 * an explicitly clamped 32-bit float format.
 	 */
 	if (args.compare &&
 	    ctx->ac.chip_class >= GFX8 &&
 	    ctx->ac.chip_class <= GFX9 &&
-	    ctx->abi->clamp_shadow_reference)
-		args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
+	    ctx->abi->clamp_shadow_reference) {
+		LLVMValueRef upgraded, clamped;
+
+		upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
+						   LLVMConstInt(ctx->ac.i32, 3, false), "");
+		upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
+					 LLVMConstInt(ctx->ac.i32, 29, false), "");
+		upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
+		clamped = ac_build_clamp(&ctx->ac, args.compare);
+		args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
+					       args.compare, "");
+	}
 
 	/* pack derivatives */
 	if (ddx || ddy) {
author	Marek Olšák <[email protected]>	2019-09-18 15:33:45 -0400
committer	Marek Olšák <[email protected]>	2019-09-23 15:34:54 -0400
commit	500181b2ba0290473763115dfded4f4551b20eca (patch)
tree	1e2eb2eb273a4760ca459ca022af4695b610df2d
parent	09447ccc78f5b08e161bfed9fdfdbf7dd8999d88 (diff)