summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_image.c32
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c73
2 files changed, 83 insertions, 22 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
index 6699375b9ae..a561643facc 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
@@ -81,17 +81,43 @@ static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg)
lvl = 0;
img->offset = pimg->u.buf.offset;
img->pitch = pimg->u.buf.size;
- img->array_pitch = 0;
} else {
lvl = pimg->u.tex.level;
img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
img->pitch = rsc->slices[lvl].pitch * rsc->cpp;
- img->array_pitch = rsc->layer_size;
}
img->width = u_minify(prsc->width0, lvl);
img->height = u_minify(prsc->height0, lvl);
- img->depth = u_minify(prsc->depth0, lvl);
+
+ unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
+
+ switch (prsc->target) {
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ img->array_pitch = rsc->layer_size;
+ img->depth = 1;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ img->array_pitch = rsc->layer_size;
+ img->depth = layers;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ img->array_pitch = rsc->layer_size;
+ img->depth = layers / 6;
+ break;
+ case PIPE_TEXTURE_3D:
+ img->array_pitch = rsc->slices[lvl].size0;
+ img->depth = u_minify(prsc->depth0, lvl);
+ break;
+ default:
+ img->array_pitch = 0;
+ img->depth = 0;
+ break;
+ }
}
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index ed87eff9976..a14239bf719 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1849,26 +1849,46 @@ get_image_slot(struct ir3_context *ctx, const nir_variable *var)
return max_samplers - var->data.driver_location - 1;
}
+/* see tex_info() for equiv logic for texture instructions.. it would be
+ * nice if this could be better unified..
+ */
static unsigned
-get_image_coords(const nir_variable *var)
+get_image_coords(const nir_variable *var, unsigned *flagsp)
{
- switch (glsl_get_sampler_dim(glsl_without_array(var->type))) {
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned coords, flags = 0;
+
+ switch (glsl_get_sampler_dim(type)) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_BUF:
- return 1;
+ coords = 1;
break;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_EXTERNAL:
case GLSL_SAMPLER_DIM_MS:
- return 2;
+ coords = 2;
+ break;
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
- return 3;
+ flags |= IR3_INSTR_3D;
+ coords = 3;
+ break;
default:
unreachable("bad sampler dim");
return 0;
}
+
+ if (glsl_sampler_type_is_array(type)) {
+ /* note: unlike tex_info(), adjust # of coords to include array idx: */
+ coords++;
+ flags |= IR3_INSTR_A;
+ }
+
+ if (flagsp)
+ *flagsp = flags;
+
+ return coords;
}
static type_t
@@ -1893,7 +1913,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *offset;
- unsigned ncoords = get_image_coords(var);
+ unsigned ncoords = get_image_coords(var, NULL);
/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
@@ -1940,13 +1960,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
const nir_variable *var = intr->variables[0]->var;
struct ir3_instruction *sam;
struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
- unsigned ncoords = get_image_coords(var);
+ unsigned flags, ncoords = get_image_coords(var, &flags);
unsigned tex_idx = get_image_slot(ctx, var);
type_t type = get_image_type(var);
- unsigned flags = 0;
-
- if (ncoords == 3)
- flags |= IR3_INSTR_3D;
sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags,
tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL);
@@ -1966,7 +1982,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *stib, *offset;
struct ir3_instruction * const *value = get_src(ctx, &intr->src[2]);
struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
- unsigned ncoords = get_image_coords(var);
+ unsigned ncoords = get_image_coords(var, NULL);
unsigned tex_idx = get_image_slot(ctx, var);
/* src0 is value
@@ -2001,19 +2017,38 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
{
struct ir3_block *b = ctx->block;
const nir_variable *var = intr->variables[0]->var;
- unsigned ncoords = get_image_coords(var);
unsigned tex_idx = get_image_slot(ctx, var);
struct ir3_instruction *sam, *lod;
- unsigned flags = 0;
-
- if (ncoords == 3)
- flags = IR3_INSTR_3D;
+ unsigned flags, ncoords = get_image_coords(var, &flags);
lod = create_immed(b, 0);
sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
tex_idx, tex_idx, lod, NULL);
- split_dest(b, dst, sam, 0, ncoords);
+ /* Array size actually ends up in .w rather than .z. This doesn't
+ * matter for miplevel 0, but for higher mips the value in z is
+ * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+ * returned, which means that we have to add 1 to it for arrays for
+ * a3xx.
+ *
+ * Note use a temporary dst and then copy, since the size of the dst
+ * array that is passed in is based on nir's understanding of the
+ * result size, not the hardware's
+ */
+ struct ir3_instruction *tmp[4];
+
+ split_dest(b, tmp, sam, 0, 4);
+
+ for (unsigned i = 0; i < ncoords; i++)
+ dst[i] = tmp[i];
+
+ if (flags & IR3_INSTR_A) {
+ if (ctx->levels_add_one) {
+ dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0);
+ } else {
+ dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32);
+ }
+ }
}
/* src[] = { coord, sample_index, value, compare }. const_index[] = {} */
@@ -2024,7 +2059,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
const nir_variable *var = intr->variables[0]->var;
struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]);
- unsigned ncoords = get_image_coords(var);
+ unsigned ncoords = get_image_coords(var, NULL);
image = create_immed(b, get_image_slot(ctx, var));