aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-02-04 13:30:34 -0500
committerRob Clark <[email protected]>2019-02-16 16:28:00 -0500
commit5118dcf8c36043d346ba0b4b45e31dbea0012e40 (patch)
tree3d3eee5b41b88725f82d62bda981e15c7a2b012a
parent2183d9cff7b068b9fcbc579480f787ebaee58a5f (diff)
freedreno/a6xx: image/ssbo state emit
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.c229
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.h5
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_image.c227
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_image.h8
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_program.c2
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_texture.c3
6 files changed, 259 insertions, 215 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index 1d2077048a4..397c04390c7 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -329,7 +329,10 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
bool
fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
- unsigned bcolor_offset)
+ unsigned bcolor_offset,
+ /* can be NULL if no image/SSBO state to merge in: */
+ const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf,
+ struct fd_shaderimg_stateobj *img)
{
bool needs_border = false;
unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;
@@ -357,7 +360,6 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
unreachable("bad state block");
}
-
if (tex->num_samplers > 0) {
struct fd_ringbuffer *state =
fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4);
@@ -388,10 +390,24 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
fd_ringbuffer_del(state);
}
- if (tex->num_textures > 0) {
+ unsigned num_merged_textures = tex->num_textures;
+ unsigned num_textures = tex->num_textures;
+ if (v) {
+ num_merged_textures += v->image_mapping.num_tex;
+
+ /* There could be more bound textures than what the shader uses.
+ * Which isn't known at shader compile time. So in the case we
+ * are merging tex state, only emit the textures that the shader
+ * uses (since the image/SSBO related tex state comes immediately
+ * after)
+ */
+ num_textures = v->image_mapping.tex_base;
+ }
+
+ if (num_merged_textures > 0) {
struct fd_ringbuffer *state =
- fd_ringbuffer_new_object(pipe, tex->num_textures * 16 * 4);
- for (unsigned i = 0; i < tex->num_textures; i++) {
+ fd_ringbuffer_new_object(pipe, num_merged_textures * 16 * 4);
+ for (unsigned i = 0; i < num_textures; i++) {
static const struct fd6_pipe_sampler_view dummy_view = {};
const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view;
@@ -424,13 +440,26 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
OUT_RING(state, 0);
}
+ if (v) {
+ const struct ir3_ibo_mapping *mapping = &v->image_mapping;
+
+ for (unsigned i = 0; i < mapping->num_tex; i++) {
+ unsigned idx = mapping->tex_to_image[i];
+ if (idx & IBO_SSBO) {
+ fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]);
+ } else {
+ fd6_emit_image_tex(state, &img->si[idx]);
+ }
+ }
+ }
+
/* emit texture state: */
OUT_PKT7(ring, opcode, 3);
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(tex->num_textures));
+ CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));
OUT_RB(ring, state); /* SRC_ADDR_LO/HI */
OUT_PKT4(ring, tex_const_reg, 2);
@@ -441,85 +470,81 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
if (tex_count_reg) {
OUT_PKT4(ring, tex_count_reg, 1);
- OUT_RING(ring, tex->num_textures);
+ OUT_RING(ring, num_merged_textures);
}
return needs_border;
}
-static void
-emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum a6xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+/* Emits combined texture state, which also includes any Image/SSBO
+ * related texture state merged in (because we must have all texture
+ * state for a given stage in a single buffer). In the fast-path, if
+ * we don't need to merge in any image/ssbo related texture state, we
+ * just use cached texture stateobj. Otherwise we generate a single-
+ * use stateobj.
+ *
+ * TODO Is there some sane way we can still use cached texture stateobj
+ * with image/ssbo in use?
+ *
+ * returns whether border_color is required:
+ */
+static bool
+fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
+ enum pipe_shader_type type, const struct ir3_shader_variant *v)
{
- unsigned count = util_last_bit(so->enabled_mask);
- unsigned opcode;
-
- if (count == 0)
- return;
-
- switch (sb) {
- case SB6_IBO:
- case SB6_CS_IBO:
- opcode = CP_LOAD_STATE6_GEOM;
- break;
- default:
- unreachable("bad state block");
- }
+ struct fd_context *ctx = emit->ctx;
+ bool needs_border = false;
- OUT_PKT7(ring, opcode, 3 + (4 * count));
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(0) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < count; i++) {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- }
+ static const struct {
+ enum a6xx_state_block sb;
+ enum fd6_state_id state_id;
+ } s[PIPE_SHADER_TYPES] = {
+ [PIPE_SHADER_VERTEX] = { SB6_VS_TEX, FD6_GROUP_VS_TEX },
+ [PIPE_SHADER_FRAGMENT] = { SB6_FS_TEX, FD6_GROUP_FS_TEX },
+ };
-#if 0
- OUT_PKT7(ring, opcode, 3 + (2 * count));
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(1) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < count; i++) {
- struct pipe_shader_buffer *buf = &so->sb[i];
- unsigned sz = buf->buffer_size;
+ debug_assert(s[type].state_id);
- /* width is in dwords, overflows into height: */
- sz /= 4;
+ if (!v->image_mapping.num_tex) {
+ /* in the fast-path, when we don't have to mix in any image/SSBO
+ * related texture state, we can just lookup the stateobj and
+ * re-emit that:
+ */
+ if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
+ ctx->tex[type].num_textures > 0) {
+ struct fd6_texture_state *tex = fd6_texture_state(ctx,
+ s[type].sb, &ctx->tex[type]);
- OUT_RING(ring, A6XX_SSBO_1_0_WIDTH(sz));
- OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(sz >> 16));
- }
-#endif
+ needs_border |= tex->needs_border;
- OUT_PKT7(ring, opcode, 3 + (2 * count));
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
- CP_LOAD_STATE6_0_STATE_TYPE(2) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE6_0_NUM_UNIT(count));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < count; i++) {
- struct pipe_shader_buffer *buf = &so->sb[i];
- if (buf->buffer) {
- struct fd_resource *rsc = fd_resource(buf->buffer);
- OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
- } else {
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ fd6_emit_add_group(emit, tex->stateobj, s[type].state_id, 0x7);
+ }
+ } else {
+ /* In the slow-path, create a one-shot texture state object
+ * if either TEX|PROG|SSBO|IMAGE state is dirty:
+ */
+ if (ctx->dirty_shader[type] &
+ (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
+ FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
+ struct fd_texture_stateobj *tex = &ctx->tex[type];
+ struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];
+ struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];
+ struct fd_ringbuffer *stateobj =
+ fd_submit_new_ringbuffer(ctx->batch->submit,
+ 0x1000, FD_RINGBUFFER_STREAMING);
+ unsigned bcolor_offset =
+ fd6_border_color_offset(ctx, s[type].sb, tex);
+
+ needs_border |= fd6_emit_textures(ctx->pipe, stateobj, s[type].sb, tex,
+ bcolor_offset, v, buf, img);
+
+ fd6_emit_add_group(emit, stateobj, s[type].state_id, 0x7);
+
+ fd_ringbuffer_del(stateobj);
}
}
+
+ return needs_border;
}
static struct fd_ringbuffer *
@@ -906,34 +931,38 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
- if ((ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) &&
- ctx->tex[PIPE_SHADER_VERTEX].num_textures > 0) {
- struct fd6_texture_state *tex = fd6_texture_state(ctx,
- SB6_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX]);
-
- needs_border |= tex->needs_border;
-
- fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_VS_TEX, 0x7);
- }
+ needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vp);
+ needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fp);
- if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) &&
- ctx->tex[PIPE_SHADER_FRAGMENT].num_textures > 0) {
- struct fd6_texture_state *tex = fd6_texture_state(ctx,
- SB6_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+ if (needs_border)
+ emit_border_color(ctx, ring);
- needs_border |= tex->needs_border;
+ if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] &
+ (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {
+ struct fd_ringbuffer *state =
+ fd6_build_ibo_state(ctx, fp, PIPE_SHADER_FRAGMENT);
+ struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(
+ ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);
+ const struct ir3_ibo_mapping *mapping = &fp->image_mapping;
- fd6_emit_add_group(emit, tex->stateobj, FD6_GROUP_FS_TEX, 0x7);
- }
+ OUT_PKT7(obj, CP_LOAD_STATE6, 3);
+ OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
+ CP_LOAD_STATE6_0_NUM_UNIT(mapping->num_ibo));
+ OUT_RB(obj, state);
- if (needs_border)
- emit_border_color(ctx, ring);
+ OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2);
+ OUT_RB(obj, state);
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB6_IBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+ OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1);
+ OUT_RING(obj, mapping->num_ibo);
- if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
- fd6_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+ fd6_emit_add_group(emit, obj, FD6_GROUP_IBO, 0x7);
+ fd_ringbuffer_del(obj);
+ fd_ringbuffer_del(state);
+ }
if (emit->num_groups > 0) {
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);
@@ -970,7 +999,7 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_SHADER_TEX) {
bool needs_border = false;
needs_border |= fd6_emit_textures(ctx->pipe, ring, SB6_CS_TEX,
- &ctx->tex[PIPE_SHADER_COMPUTE], 0);
+ &ctx->tex[PIPE_SHADER_COMPUTE], 0, NULL, NULL, NULL);
if (needs_border)
emit_border_color(ctx, ring);
@@ -999,11 +1028,11 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
#endif
- if (dirty & FD_DIRTY_SHADER_SSBO)
- emit_ssbos(ctx, ring, SB6_CS_IBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
-
- if (dirty & FD_DIRTY_SHADER_IMAGE)
- fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+// if (dirty & FD_DIRTY_SHADER_SSBO)
+// fd6_emit_ssbos(ctx, ring, PIPE_SHADER_COMPUTE);
+//
+// if (dirty & FD_DIRTY_SHADER_IMAGE)
+// fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index 9e578844741..ab7ace4f26f 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -53,6 +53,7 @@ enum fd6_state_id {
FD6_GROUP_FS_CONST,
FD6_GROUP_VS_TEX,
FD6_GROUP_FS_TEX,
+ FD6_GROUP_IBO,
FD6_GROUP_RASTERIZER,
FD6_GROUP_ZSA,
};
@@ -173,7 +174,9 @@ fd6_stage2shadersb(gl_shader_stage type)
bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
enum a6xx_state_block sb, struct fd_texture_stateobj *tex,
- unsigned bcolor_offset);
+ unsigned bcolor_offset,
+ const struct ir3_shader_variant *v, struct fd_shaderbuf_stateobj *buf,
+ struct fd_shaderimg_stateobj *img);
void fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
index f4e3492154d..153c00435f6 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
@@ -32,16 +32,6 @@
#include "fd6_format.h"
#include "fd6_texture.h"
-static enum a6xx_state_block texsb[] = {
- [PIPE_SHADER_COMPUTE] = SB6_CS_TEX,
- [PIPE_SHADER_FRAGMENT] = SB6_FS_TEX,
-};
-
-static enum a6xx_state_block imgsb[] = {
- [PIPE_SHADER_COMPUTE] = SB6_CS_IBO,
- [PIPE_SHADER_FRAGMENT] = SB6_IBO,
-};
-
struct fd6_image {
struct pipe_resource *prsc;
enum pipe_format pfmt;
@@ -57,16 +47,16 @@ struct fd6_image {
uint32_t array_pitch;
struct fd_bo *bo;
uint32_t offset;
+ bool buffer;
};
-static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
+static void translate_image(struct fd6_image *img, const struct pipe_image_view *pimg)
{
enum pipe_format format = pimg->format;
struct pipe_resource *prsc = pimg->resource;
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl;
- if (!pimg->resource) {
+ if (!prsc) {
memset(img, 0, sizeof(*img));
return;
}
@@ -81,45 +71,76 @@ static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg)
img->bo = rsc->bo;
if (prsc->target == PIPE_BUFFER) {
- lvl = 0;
+ img->buffer = true;
img->offset = pimg->u.buf.offset;
- img->pitch = pimg->u.buf.size;
+ img->pitch = 0;
img->array_pitch = 0;
+
+ /* size is encoded with low 15b in WIDTH and high bits in
+ * HEIGHT, in units of elements:
+ */
+ unsigned sz = prsc->width0;
+ img->width = sz & MASK(15);
+ img->height = sz >> 15;
+ img->depth = 0;
} else {
- lvl = pimg->u.tex.level;
+ img->buffer = false;
+ unsigned lvl = pimg->u.tex.level;
img->offset = rsc->slices[lvl].offset;
img->pitch = rsc->slices[lvl].pitch * rsc->cpp;
img->array_pitch = rsc->layer_size;
- }
- img->width = u_minify(prsc->width0, lvl);
- img->height = u_minify(prsc->height0, lvl);
- img->depth = u_minify(prsc->depth0, lvl);
+ img->width = u_minify(prsc->width0, lvl);
+ img->height = u_minify(prsc->height0, lvl);
+ img->depth = u_minify(prsc->depth0, lvl);
+ }
}
-static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
- struct fd6_image *img, enum pipe_shader_type shader)
+static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg)
{
- unsigned opcode = CP_LOAD_STATE6_FRAG;
+ enum pipe_format format = PIPE_FORMAT_R32_UINT;
+ struct pipe_resource *prsc = pimg->buffer;
+ struct fd_resource *rsc = fd_resource(prsc);
- assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+ if (!prsc) {
+ memset(img, 0, sizeof(*img));
+ return;
+ }
+
+ img->prsc = prsc;
+ img->pfmt = format;
+ img->fmt = fd6_pipe2tex(format);
+ img->fetchsize = fd6_pipe2fetchsize(format);
+ img->type = fd6_tex_type(prsc->target);
+ img->srgb = util_format_is_srgb(format);
+ img->cpp = rsc->cpp;
+ img->bo = rsc->bo;
+ img->buffer = true;
- OUT_PKT7(ring, opcode, 3 + 12);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
- CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) |
- CP_LOAD_STATE6_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ img->offset = pimg->buffer_offset;
+ img->pitch = 0;
+ img->array_pitch = 0;
+ /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
+ * in units of elements:
+ */
+ unsigned sz = pimg->buffer_size / 4;
+ img->width = sz & MASK(15);
+ img->height = sz >> 15;
+ img->depth = 0;
+}
+
+static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) |
+ A6XX_TEX_CONST_0_TILE_MODE(fd_resource(img->prsc)->tile_mode) |
fd6_tex_swiz(img->prsc, img->fmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
COND(img->srgb, A6XX_TEX_CONST_0_SRGB));
OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
A6XX_TEX_CONST_1_HEIGHT(img->height));
OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) |
+ COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
A6XX_TEX_CONST_2_TYPE(img->type) |
A6XX_TEX_CONST_2_PITCH(img->pitch));
OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
@@ -136,101 +157,85 @@ static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
}
-static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
- struct fd6_image *img, enum pipe_shader_type shader)
+void
+fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg)
{
- unsigned opcode = CP_LOAD_STATE6_FRAG;
+ struct fd6_image img;
+ translate_image(&img, pimg);
+ emit_image_tex(ring, &img);
+}
- assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+void
+fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf)
+{
+ struct fd6_image img;
+ translate_buf(&img, pbuf);
+ emit_image_tex(ring, &img);
+}
-#if 0
- OUT_PKT7(ring, opcode, 3 + 4);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
- CP_LOAD_STATE6_0_STATE_TYPE(0) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
- CP_LOAD_STATE6_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0));
- OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch));
- OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch));
- OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp));
-#endif
-
-#if 0
- OUT_PKT7(ring, opcode, 3 + 2);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
- CP_LOAD_STATE6_0_STATE_TYPE(1) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
- CP_LOAD_STATE6_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) |
- A6XX_SSBO_1_0_WIDTH(img->width));
- OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) |
- A6XX_SSBO_1_1_DEPTH(img->depth));
-#endif
-
- OUT_PKT7(ring, opcode, 3 + 2);
- OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
- CP_LOAD_STATE6_0_STATE_TYPE(2) |
- CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) |
- CP_LOAD_STATE6_0_NUM_UNIT(1));
- OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
+{
+ debug_assert(fd_resource(img->prsc)->tile_mode == 0);
+
+ OUT_RING(ring, A6XX_IBO_0_FMT(img->fmt));
+ OUT_RING(ring, A6XX_IBO_1_WIDTH(img->width) |
+ A6XX_IBO_1_HEIGHT(img->height));
+ OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
+ COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
+ A6XX_IBO_2_TYPE(img->type));
+ OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch));
if (img->bo) {
- OUT_RELOCW(ring, img->bo, img->offset, 0, 0);
+ OUT_RELOCW(ring, img->bo, img->offset,
+ (uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
} else {
OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
}
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
}
-/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
- * are placedd, in reverse order, at the end of the state block, so for
- * example the sampler state:
- *
- * 0: first texture
- * 1: second texture
- * ....
- * N-1: second image
- * N: first image
- */
-static unsigned
-get_image_slot(unsigned index)
-{
- /* TODO figure out real limit per generation, and don't hardcode.
- * This needs to match get_image_slot() in ir3_compiler_nir.
- * Possibly should be factored out into shared helper?
- */
- const unsigned max_samplers = 16;
- return max_samplers - index - 1;
-}
-
-/* Emit required "SSBO" and sampler state. The sampler state is used by the
- * hw for imageLoad(), and "SSBO" state for imageStore(). Returns max sampler
- * used.
- */
-void
-fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
+/* Build combined image/SSBO "IBO" state, returns ownership of state reference */
+struct fd_ringbuffer *
+fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v,
enum pipe_shader_type shader)
{
- struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
- unsigned enabled_mask = so->enabled_mask;
+ struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
+ struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
+ const struct ir3_ibo_mapping *mapping = &v->image_mapping;
+
+ struct fd_ringbuffer *state =
+ fd_submit_new_ringbuffer(ctx->batch->submit,
+ mapping->num_ibo * 16 * 4, FD_RINGBUFFER_STREAMING);
- while (enabled_mask) {
- unsigned index = u_bit_scan(&enabled_mask);
- unsigned slot = get_image_slot(index);
+ assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);
+
+ for (unsigned i = 0; i < mapping->num_ibo; i++) {
struct fd6_image img;
+ unsigned idx = mapping->ibo_to_image[i];
- translate_image(&img, &so->si[index]);
+ if (idx & IBO_SSBO) {
+ translate_buf(&img, &bufso->sb[idx & ~IBO_SSBO]);
+ } else {
+ translate_image(&img, &imgso->si[idx]);
+ }
- emit_image_tex(ring, slot, &img, shader);
- emit_image_ssbo(ring, slot, &img, shader);
+ emit_image_ssbo(state, &img);
}
+
+ return state;
}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h
index 0ee53932737..a2dbfd3c1a8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_image.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h
@@ -30,7 +30,11 @@
#include "freedreno_context.h"
-void fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
- enum pipe_shader_type shader);
+void fd6_emit_image_tex(struct fd_ringbuffer *ring, const struct pipe_image_view *pimg);
+void fd6_emit_ssbo_tex(struct fd_ringbuffer *ring, const struct pipe_shader_buffer *pbuf);
+
+struct ir3_shader_variant;
+struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx,
+ const struct ir3_shader_variant *v, enum pipe_shader_type shader);
#endif /* FD6_IMAGE_H_ */
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 9180154ed68..a9d8384aff7 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -349,6 +349,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2);
OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) |
+ A6XX_SP_VS_CONFIG_NIBO(s[VS].v->image_mapping.num_ibo) |
A6XX_SP_VS_CONFIG_NTEX(s[VS].v->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(s[VS].v->num_samp)); /* SP_VS_CONFIG */
OUT_RING(ring, s[VS].instrlen); /* SP_VS_INSTRLEN */
@@ -382,6 +383,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2);
OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) |
+ A6XX_SP_FS_CONFIG_NIBO(s[FS].v->image_mapping.num_ibo) |
A6XX_SP_FS_CONFIG_NTEX(s[FS].v->num_samp) |
A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp)); /* SP_FS_CONFIG */
OUT_RING(ring, s[FS].instrlen); /* SP_FS_INSTRLEN */
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
index f4bad031e6b..171a016d985 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
@@ -448,7 +448,8 @@ fd6_texture_state(struct fd_context *ctx, enum a6xx_state_block sb,
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
state->needs_border = needs_border;
- fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset);
+ fd6_emit_textures(ctx->pipe, state->stateobj, sb, tex, key.bcolor_offset,
+ NULL, NULL, NULL);
/* NOTE: uses copy of key in state obj, because pointer passed by caller
* is probably on the stack