diff options
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 73 |
1 files changed, 54 insertions, 19 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index ed87eff9976..a14239bf719 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1849,26 +1849,46 @@ get_image_slot(struct ir3_context *ctx, const nir_variable *var) return max_samplers - var->data.driver_location - 1; } +/* see tex_info() for equiv logic for texture instructions.. it would be + * nice if this could be better unified.. + */ static unsigned -get_image_coords(const nir_variable *var) +get_image_coords(const nir_variable *var, unsigned *flagsp) { - switch (glsl_get_sampler_dim(glsl_without_array(var->type))) { + const struct glsl_type *type = glsl_without_array(var->type); + unsigned coords, flags = 0; + + switch (glsl_get_sampler_dim(type)) { case GLSL_SAMPLER_DIM_1D: case GLSL_SAMPLER_DIM_BUF: - return 1; + coords = 1; break; case GLSL_SAMPLER_DIM_2D: case GLSL_SAMPLER_DIM_RECT: case GLSL_SAMPLER_DIM_EXTERNAL: case GLSL_SAMPLER_DIM_MS: - return 2; + coords = 2; + break; case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - return 3; + flags |= IR3_INSTR_3D; + coords = 3; + break; default: unreachable("bad sampler dim"); return 0; } + + if (glsl_sampler_type_is_array(type)) { + /* note: unlike tex_info(), adjust # of coords to include array idx: */ + coords++; + flags |= IR3_INSTR_A; + } + + if (flagsp) + *flagsp = flags; + + return coords; } static type_t @@ -1893,7 +1913,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, { struct ir3_block *b = ctx->block; struct ir3_instruction *offset; - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: @@ -1940,13 +1960,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, const nir_variable *var = intr->variables[0]->var; struct ir3_instruction *sam; struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned flags, ncoords = get_image_coords(var, &flags); unsigned tex_idx = get_image_slot(ctx, var); type_t type = get_image_type(var); - unsigned flags = 0; - - if (ncoords == 3) - flags |= IR3_INSTR_3D; sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags, tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL); @@ -1966,7 +1982,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *stib, *offset; struct ir3_instruction * const *value = get_src(ctx, &intr->src[2]); struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); unsigned tex_idx = get_image_slot(ctx, var); /* src0 is value @@ -2001,19 +2017,38 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; const nir_variable *var = intr->variables[0]->var; - unsigned ncoords = get_image_coords(var); unsigned tex_idx = get_image_slot(ctx, var); struct ir3_instruction *sam, *lod; - unsigned flags = 0; - - if (ncoords == 3) - flags = IR3_INSTR_3D; + unsigned flags, ncoords = get_image_coords(var, &flags); lod = create_immed(b, 0); sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, tex_idx, tex_idx, lod, NULL); - split_dest(b, dst, sam, 0, ncoords); + /* Array size actually ends up in .w rather than .z. This doesn't + * matter for miplevel 0, but for higher mips the value in z is + * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is + * returned, which means that we have to add 1 to it for arrays for + * a3xx. + * + * Note use a temporary dst and then copy, since the size of the dst + * array that is passed in is based on nir's understanding of the + * result size, not the hardware's + */ + struct ir3_instruction *tmp[4]; + + split_dest(b, tmp, sam, 0, 4); + + for (unsigned i = 0; i < ncoords; i++) + dst[i] = tmp[i]; + + if (flags & IR3_INSTR_A) { + if (ctx->levels_add_one) { + dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0); + } else { + dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32); + } + } } /* src[] = { coord, sample_index, value, compare }. const_index[] = {} */ @@ -2024,7 +2059,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) const nir_variable *var = intr->variables[0]->var; struct ir3_instruction *atomic, *image, *src0, *src1, *src2; struct ir3_instruction * const *coords = get_src(ctx, &intr->src[0]); - unsigned ncoords = get_image_coords(var); + unsigned ncoords = get_image_coords(var, NULL); image = create_immed(b, get_image_slot(ctx, var)); |