summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c23
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c10
2 files changed, 33 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 197196383b0..7a3c8a8579c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2035,6 +2035,29 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
split_dest(b, tmp, sam, 0, 4);
+ /* get_size instruction returns size in bytes instead of texels
+ * for imageBuffer, so we need to divide it by the pixel size
+ * of the image format.
+ *
+ * TODO: This is at least true on a5xx. Check other gens.
+ */
+ enum glsl_sampler_dim dim =
+ glsl_get_sampler_dim(glsl_without_array(var->type));
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ /* Since all the possible values the divisor can take are
+ * power-of-two (4, 8, or 16), the division is implemented
+ * as a shift-right.
+ * During shader setup, the log2 of the image format's
+ * bytes-per-pixel should have been emitted in 2nd slot of
+ * image_dims. See ir3_shader::emit_image_dims().
+ */
+ unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+ ctx->so->const_layout.image_dims.off[var->data.driver_location];
+ struct ir3_instruction *aux = create_uniform(ctx, cb + 1);
+
+ tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
+ }
+
for (unsigned i = 0; i < ncoords; i++)
dst[i] = tmp[i];
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 9bf0a7f999c..b3127ff8c38 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -699,6 +699,16 @@ emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
} else {
dims[off + 2] = rsc->slices[lvl].size0;
}
+ } else {
+ /* For buffer-backed images, the log2 of the format's
+ * bytes-per-pixel is placed on the 2nd slot. This is useful
+ * when emitting image_size instructions, for which we need
+ * to divide by bpp for image buffers. Since the bpp
+ * can only be power-of-two, the division is implemented
+ * as a SHR, and for that it is handy to have the log2 of
+ * bpp as a constant. (log2 = first-set-bit - 1)
+ */
+ dims[off + 1] = ffs(dims[off + 0]) - 1;
}
}