diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_image.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 73 |
2 files changed, 83 insertions, 22 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c index 6699375b9ae..a561643facc 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c @@ -81,17 +81,43 @@ static void translate_image(struct fd5_image *img, struct pipe_image_view *pimg) lvl = 0; img->offset = pimg->u.buf.offset; img->pitch = pimg->u.buf.size; - img->array_pitch = 0; } else { lvl = pimg->u.tex.level; img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer); img->pitch = rsc->slices[lvl].pitch * rsc->cpp; - img->array_pitch = rsc->layer_size; } img->width = u_minify(prsc->width0, lvl); img->height = u_minify(prsc->height0, lvl); - img->depth = u_minify(prsc->depth0, lvl); + + unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1; + + switch (prsc->target) { + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + img->array_pitch = rsc->layer_size; + img->depth = 1; + break; + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + img->array_pitch = rsc->layer_size; + img->depth = layers; + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + img->array_pitch = rsc->layer_size; + img->depth = layers / 6; + break; + case PIPE_TEXTURE_3D: + img->array_pitch = rsc->slices[lvl].size0; + img->depth = u_minify(prsc->depth0, lvl); + break; + default: + img->array_pitch = 0; + img->depth = 0; + break; + } } static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index ed87eff9976..a14239bf719 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1849,26 +1849,46 @@ get_image_slot(struct ir3_context *ctx, const nir_variable *var) return max_samplers - var->data.driver_location - 1; } +/* see tex_info() for equiv logic for texture instructions.. it would be + * nice if this could be better unified.. + */ static unsigned -get_image_coords(const nir_variable *var) +get_image_coords(const nir_variable *var, unsigned *flagsp) { - switch (glsl_get_sampler_dim(glsl_without_array(var->type))) { + const struct glsl_type *type = glsl_without_array(var->type); + unsigned coords, flags = 0; + + switch (glsl_get_sampler_dim(type)) { case GLSL_SAMPLER_DIM_1D: case GLSL_SAMPLER_DIM_BUF: - return 1; + coords = 1; break; case GLSL_SAMPLER_DIM_2D: case GLSL_SAMPLER_DIM_RECT: case GLSL_SAMPLER_DIM_EXTERNAL: case GLSL_SAMPLER_DIM_MS: - return 2; + coords = 2; + break; case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - return 3; + flags |= IR3_INSTR_3D; + coords = 3; + break; default: unreachable("bad sampler dim"); return 0; } + + if (glsl_sampler_type_is_array(type)) { + /* note: unlike tex_info(), adjust # of coords to include array idx: */ + coords++; + flags |= IR3_INSTR_A; + } + + if (flagsp) + *flagsp = flags; + + return coords; } static type_t @@ -1893,7 +1913,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, { struct ir3_block *b = ctx->block; struct ir3_instruction *offset; - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: @@ -1940,13 +1960,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, const nir_variable *var = intr->variables[0]->var; struct ir3_instruction *sam; struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned flags, ncoords = get_image_coords(var, &flags); unsigned tex_idx = get_image_slot(ctx, var); type_t type = get_image_type(var); - unsigned flags = 0; - - if (ncoords == 3) - flags |= IR3_INSTR_3D; sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags, tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL); @@ -1966,7 +1982,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *stib, *offset; struct ir3_instruction * const *value = get_src(ctx, &intr->src[2]); struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); unsigned tex_idx = get_image_slot(ctx, var); /* src0 is value @@ -2001,19 +2017,38 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; const nir_variable *var = intr->variables[0]->var; - unsigned ncoords = get_image_coords(var); unsigned tex_idx = get_image_slot(ctx, var); struct ir3_instruction *sam, *lod; - unsigned flags = 0; - - if (ncoords == 3) - flags = IR3_INSTR_3D; + unsigned flags, ncoords = get_image_coords(var, &flags); lod = create_immed(b, 0); sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex_idx, tex_idx, lod, NULL); - split_dest(b, dst, sam, 0, ncoords); + /* Array size actually ends up in .w rather than .z. This doesn't + * matter for miplevel 0, but for higher mips the value in z is + * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is + * returned, which means that we have to add 1 to it for arrays for + * a3xx. + * + * Note use a temporary dst and then copy, since the size of the dst + * array that is passed in is based on nir's understanding of the + * result size, not the hardware's + */ + struct ir3_instruction *tmp[4]; + + split_dest(b, tmp, sam, 0, 4); + + for (unsigned i = 0; i < ncoords; i++) + dst[i] = tmp[i]; + + if (flags & IR3_INSTR_A) { + if (ctx->levels_add_one) { + dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0); + } else { + dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32); + } + } } /* src[] = { coord, sample_index, value, compare }. const_index[] = {} */ @@ -2024,7 +2059,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) const nir_variable *var = intr->variables[0]->var; struct ir3_instruction *atomic, *image, *src0, *src1, *src2; struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); image = create_immed(b, get_image_slot(ctx, var)); |