summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-10-11 23:19:46 +0200
committerMarek Olšák <[email protected]>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeonsi/si_shader.c
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c18
1 files changed, 16 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fc50205633d..b2d76994996 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4607,12 +4607,26 @@ static void tex_fetch_args(
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
+ LLVMValueRef z;
+
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
- address[count++] = coords[ref_pos];
+ z = coords[ref_pos];
}
+
+ /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (ctx->screen->b.chip_class == VI)
+ z = radeon_llvm_saturate(bld_base, z);
+
+ address[count++] = z;
}
/* Pack user derivatives */