summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-02-15 03:39:43 -0500
committerIlia Mirkin <[email protected]>2015-04-02 00:09:14 -0400
commit4a3c0e995063320693782b934962969e11dab29d (patch)
tree4c270d91eaa16e270bb381b0581d6d7caef8c85b /src/gallium
parent6f4c1976f4e5ecdebfe5b9ac16b6d13a5e60eed1 (diff)
freedreno/a3xx: add MRT support
The hardware only supports 4 MRTs. It should be possible to emulate support for 8, but doesn't seem worth the trouble. Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c20
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c104
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.h8
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c159
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c58
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_screen.c2
8 files changed, 219 insertions, 138 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 6ff762e2ae4..044355c2b68 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -129,7 +129,6 @@ static void
fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd3_emit emit = {
.vtx = &ctx->vtx,
.prog = &ctx->prog,
@@ -152,7 +151,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.vinteger_s = fd3_ctx->vinteger_s,
.finteger_s = fd3_ctx->finteger_s,
},
- .format = pipe_surface_format(pfb->cbufs[0]),
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
};
@@ -239,17 +237,18 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
- unsigned ce, i;
+ unsigned i;
struct fd3_emit emit = {
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = fd3_half_precision(format),
+ .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+ fd3_half_precision(pfb->cbufs[1]) &&
+ fd3_half_precision(pfb->cbufs[2]) &&
+ fd3_half_precision(pfb->cbufs[3])),
},
- .format = format,
};
dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
@@ -326,17 +325,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
}
- if (buffers & PIPE_CLEAR_COLOR) {
- ce = 0xf;
- } else {
- ce = 0x0;
- }
-
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
- A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+ COND(buffers & (PIPE_CLEAR_COLOR0 << i),
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)));
OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index a5874e44ba8..1b656b77464 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -293,59 +293,92 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
* case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil.
*/
void
-fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+ struct pipe_surface **psurf,
+ int bufs)
{
- struct fd_resource *rsc = fd_resource(psurf->texture);
- unsigned lvl = psurf->u.tex.level;
- struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
- uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
- enum pipe_format format = fd3_gmem_restore_format(psurf->format);
-
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+ int i, j;
/* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 4);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
- A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
- A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
- A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
- A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < bufs; i++) {
+ OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+ }
/* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 6);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
- A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
- fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
- OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
- A3XX_TEX_CONST_1_WIDTH(psurf->width) |
- A3XX_TEX_CONST_1_HEIGHT(psurf->height));
- OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
- A3XX_TEX_CONST_2_INDX(0));
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < bufs; i++) {
+ if (!psurf[i]) {
+ OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+ A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
+ A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, 0x00000000);
+ continue;
+ }
+
+ struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+ unsigned lvl = psurf[i]->u.tex.level;
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+ enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
+
+ debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
+
+ OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
+ A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+ fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
+ A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
+ A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
+ OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+ A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, 0x00000000);
+ }
/* emit mipaddrs: */
- OUT_PKT3(ring, CP_LOAD_STATE, 3);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ for (i = 0; i < bufs; i++) {
+ if (psurf[i]) {
+ struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+ unsigned lvl = psurf[i]->u.tex.level;
+ uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+
+ /* pad the remaining entries w/ null: */
+ for (j = 1; j < BASETABLE_SZ; j++) {
+ OUT_RING(ring, 0x00000000);
+ }
+ }
}
void
@@ -570,8 +603,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG)
- fd3_program_emit(ring, emit);
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+ }
/* TODO we should not need this or fd_wfi() before emit_constants():
*/
@@ -624,6 +659,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
}
+ if (format == PIPE_FORMAT_NONE)
+ control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+
if (has_alpha) {
blend_control |= blend->rb_mrt[i].blend_control_rgb;
} else {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index ce51c0c4968..a438ddaee85 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -45,7 +45,7 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
const uint32_t *dwords, struct pipe_resource *prsc);
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- struct pipe_surface *psurf);
+ struct pipe_surface **psurf, int bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd3_emit {
@@ -53,7 +53,6 @@ struct fd3_emit {
const struct fd_program_stateobj *prog;
const struct pipe_draw_info *info;
struct ir3_shader_key key;
- enum pipe_format format;
uint32_t dirty;
uint32_t sprite_coord_enable;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 6a47fda1029..6afc3015901 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -42,8 +42,14 @@ uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
static INLINE bool
-fd3_half_precision(enum pipe_format format)
+fd3_half_precision(const struct pipe_surface *surface)
{
+ enum pipe_format format;
+ if (!surface)
+ return true;
+
+ format = surface->format;
+
/* colors are provided in consts, which go through cov.f32f16, which will
* break these values
*/
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 304fc846af8..8589dd6faa1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -89,6 +89,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
} else {
stride = slice->pitch * rsc->cpp;
}
+ } else if (i < nr_bufs && bases) {
+ base = bases[i];
}
OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
@@ -97,7 +99,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
- if (bin_w || (i >= nr_bufs)) {
+ if (bin_w || (i >= nr_bufs) || !bufs[i]) {
OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
} else {
OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
@@ -110,20 +112,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
}
}
-static uint32_t
-depth_base(struct fd_context *ctx)
-{
- struct fd_gmem_stateobj *gmem = &ctx->gmem;
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- uint32_t cpp = 4;
- if (pfb->cbufs[0]) {
- struct fd_resource *rsc =
- fd_resource(pfb->cbufs[0]->texture);
- cpp = rsc->cpp;
- }
- return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
-}
-
static bool
use_hw_binning(struct fd_context *ctx)
{
@@ -167,7 +155,8 @@ emit_binning_workaround(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
@@ -189,7 +178,7 @@ emit_binning_workaround(struct fd_context *ctx)
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
- fd3_program_emit(ring, &emit);
+ fd3_program_emit(ring, &emit, 0, NULL);
fd3_emit_vertex_bufs(ring, &emit);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
@@ -338,15 +327,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd3_emit emit = {
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = fd3_half_precision(format),
+ .half_precision = true,
},
- .format = format,
};
+ int i;
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
@@ -388,7 +376,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
@@ -419,21 +408,28 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &emit);
+ fd3_program_emit(ring, &emit, 0, NULL);
fd3_emit_vertex_bufs(ring, &emit);
- if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- uint32_t base = depth_base(ctx);
- emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
- }
+ if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL,
+ ctx->gmem.zsbuf_base, pfb->zsbuf);
if (ctx->resolve & FD_BUFFER_COLOR) {
- emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]);
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE,
+ ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
+ }
}
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@@ -444,14 +440,24 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
/* transfer from system memory to gmem */
static void
-emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
- struct pipe_surface *psurf, uint32_t bin_w)
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
+ struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
{
struct fd_ringbuffer *ring = ctx->ring;
- emit_mrt(ring, 1, &psurf, &base, bin_w);
+ assert(bufs > 0);
+
+ emit_mrt(ring, bufs, psurf, bases, bin_w);
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
- fd3_emit_gmem_restore_tex(ring, psurf);
+ fd3_emit_gmem_restore_tex(ring, psurf, bufs);
fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -464,15 +470,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
struct fd3_emit emit = {
.vtx = &fd3_ctx->blit_vbuf_state,
- .prog = &ctx->blit_prog[0],
.sprite_coord_enable = 1,
+ /* NOTE: They all use the same VP, this is for vtx bufs. */
+ .prog = &ctx->blit_prog[0],
.key = {
- .half_precision = fd3_half_precision(format),
+ .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+ fd3_half_precision(pfb->cbufs[1]) &&
+ fd3_half_precision(pfb->cbufs[2]) &&
+ fd3_half_precision(pfb->cbufs[3])),
},
- .format = format,
};
float x0, y0, x1, y1;
unsigned bin_w = tile->bin_w;
@@ -515,6 +523,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */
@@ -567,7 +579,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
- fd3_program_emit(ring, &emit);
fd3_emit_vertex_bufs(ring, &emit);
/* for gmem pitch/base calculations, we need to use the non-
@@ -576,16 +587,27 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
bin_w = gmem->bin_w;
bin_h = gmem->bin_h;
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
- emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+ emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+ fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+ emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+ }
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
- emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ emit.prog = &ctx->blit_prog[0];
+ fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
+ emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+ }
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
static void
@@ -617,12 +639,13 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_ringbuffer *ring = ctx->ring;
- uint32_t pitch = 0;
+ uint32_t i, pitch = 0;
- if (pfb->cbufs[0]) {
- struct pipe_surface *psurf = pfb->cbufs[0];
- unsigned lvl = psurf->u.tex.level;
- pitch = fd_resource(psurf->texture)->slices[lvl].pitch;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ struct pipe_surface *psurf = pfb->cbufs[i];
+ if (!psurf)
+ continue;
+ pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
}
fd3_emit_restore(ctx);
@@ -647,7 +670,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
patch_draws(ctx, IGNORE_VISIBILITY);
patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
@@ -734,7 +758,8 @@ emit_binning_pass(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(0));
for (i = 0; i < 4; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
@@ -774,7 +799,8 @@ emit_binning_pass(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
@@ -848,21 +874,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- struct fd_gmem_stateobj *gmem = &ctx->gmem;
- uint32_t reg;
-
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
- reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
- if (pfb->zsbuf) {
- reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
- }
- OUT_RING(ring, reg);
- if (pfb->zsbuf) {
- uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
- OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
- } else {
- OUT_RING(ring, 0x00000000);
- }
if (ctx->needs_rb_fbd) {
fd_wfi(ctx, ring);
@@ -874,7 +885,8 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
- A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+ A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
}
/* before IB to rendering cmds: */
@@ -891,6 +903,21 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
uint32_t x2 = tile->xoff + tile->bin_w - 1;
uint32_t y2 = tile->yoff + tile->bin_h - 1;
+ uint32_t reg;
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base);
+ if (pfb->zsbuf) {
+ reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ }
+ OUT_RING(ring, reg);
+ if (pfb->zsbuf) {
+ uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+ OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+
if (use_hw_binning(ctx)) {
struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
@@ -918,7 +945,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 442b47dea9f..4581a6b7af9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -31,8 +31,6 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "freedreno_program.h"
@@ -127,13 +125,14 @@ emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
}
void
-fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
+fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+ int nr, struct pipe_surface **bufs)
{
const struct ir3_shader_variant *vp, *fp;
const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
- uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+ uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
int constmode;
int i, j, k;
@@ -199,11 +198,26 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
psize_regid = ir3_find_output_regid(vp,
ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
- color_regid = ir3_find_output_regid(fp,
- ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ if (fp->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ } else {
+ for (int i = 0; i < fp->outputs_count; i++) {
+ ir3_semantic sem = fp->outputs[i].semantic;
+ unsigned idx = sem2idx(sem);
+ if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+ continue;
+ assert(idx < 4);
+ color_regid[idx] = fp->outputs[i].regid;
+ }
+ }
- if (util_format_is_alpha(emit->format))
- color_regid += 3;
+ /* adjust regids for alpha output formats. there is no alpha render
+ * format, so it's just treated like red
+ */
+ for (i = 0; i < nr; i++)
+ if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+ color_regid[i] += 3;
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
@@ -345,21 +359,23 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
}
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
- if (fp->writes_pos) {
- OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
- A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
- } else {
- OUT_RING(ring, 0x00000000);
- }
+ OUT_RING(ring,
+ COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+ A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
+ A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
- OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
- COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION) |
- COND(util_format_is_pure_uint(emit->format), A3XX_SP_FS_MRT_REG_UINT) |
- COND(util_format_is_pure_sint(emit->format), A3XX_SP_FS_MRT_REG_SINT));
- OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+ for (i = 0; i < 4; i++) {
+ uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+ COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
+
+ if (i < nr) {
+ enum pipe_format fmt = pipe_surface_format(bufs[i]);
+ mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
+ COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
+ }
+ OUT_RING(ring, mrt_reg);
+ }
if (emit->key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 0313b774a08..52c808071a4 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -39,7 +39,8 @@ struct fd3_shader_stateobj {
struct fd3_emit;
-void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit);
+void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+ int nr, struct pipe_surface **bufs);
void fd3_prog_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
index 182db849902..3497921257c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
@@ -103,7 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen,
void
fd3_screen_init(struct pipe_screen *pscreen)
{
- fd_screen(pscreen)->max_rts = 1;
+ fd_screen(pscreen)->max_rts = 4;
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
}