diff options
author | Kristian H. Kristensen <[email protected]> | 2018-08-15 09:18:41 -0700 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-08-16 19:13:36 -0400 |
commit | de3b34df97326b793fac2152eedbd25a0c2d0812 (patch) | |
tree | e186a1d711a2c2f0695feca5de3ff357cebf5b27 | |
parent | 6ee58e8257528abeea3f62310b7f30aeedac9e57 (diff) |
freedreno: Add a6xx backend
This adds a freedreno backend for the a6xx generation GPUs, which at
the time of this commit is about 98% GLES2 conformant. Much remains to
be done - both performance work and feature work towards more recent
GLES versions, but this is a good start.
Signed-off-by: Kristian H. Kristensen <[email protected]>
Signed-off-by: Rob Clark <[email protected]>
40 files changed, 6370 insertions, 19 deletions
diff --git a/configure.ac b/configure.ac index c2155a541b0..2f1d13ce6ef 100644 --- a/configure.ac +++ b/configure.ac @@ -78,7 +78,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.93 LIBDRM_INTEL_REQUIRED=2.4.75 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 -LIBDRM_FREEDRENO_REQUIRED=2.4.92 +LIBDRM_FREEDRENO_REQUIRED=2.4.93 LIBDRM_ETNAVIV_REQUIRED=2.4.89 LIBDRM_VC4_REQUIRED=2.4.89 diff --git a/meson.build b/meson.build index 351c0785aff..fa3367d1fa7 100644 --- a/meson.build +++ b/meson.build @@ -1090,7 +1090,7 @@ _drm_amdgpu_ver = '2.4.93' _drm_radeon_ver = '2.4.71' _drm_nouveau_ver = '2.4.66' _drm_etnaviv_ver = '2.4.89' -_drm_freedreno_ver = '2.4.92' +_drm_freedreno_ver = '2.4.93' _drm_intel_ver = '2.4.75' _drm_ver = '2.4.75' diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 3bcac39484a..c19b776892c 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -22,6 +22,7 @@ libfreedreno_la_SOURCES = \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ $(a5xx_SOURCES) \ + $(a6xx_SOURCES) \ $(ir3_SOURCES) \ $(ir3_GENERATED_FILES) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 328cbdfbf94..8e744c33751 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -161,6 +161,37 @@ a5xx_SOURCES := \ a5xx/fd5_zsa.c \ a5xx/fd5_zsa.h +a6xx_SOURCES := \ + a6xx/a6xx.xml.h \ + a6xx/fd6_blend.c \ + a6xx/fd6_blend.h \ + a6xx/fd6_context.c \ + a6xx/fd6_context.h \ + a6xx/fd6_draw.c \ + a6xx/fd6_draw.h \ + a6xx/fd6_emit.c \ + a6xx/fd6_emit.h \ + a6xx/fd6_format.c \ + a6xx/fd6_format.h \ + a6xx/fd6_gmem.c \ + a6xx/fd6_gmem.h \ + a6xx/fd6_image.c \ + a6xx/fd6_image.h \ + a6xx/fd6_program.c \ + a6xx/fd6_program.h \ + a6xx/fd6_query.c \ + a6xx/fd6_query.h \ + a6xx/fd6_rasterizer.c \ + a6xx/fd6_rasterizer.h \ + a6xx/fd6_resource.c \ + a6xx/fd6_resource.h \ + a6xx/fd6_screen.c \ + a6xx/fd6_screen.h \ + a6xx/fd6_texture.c \ + a6xx/fd6_texture.h \ + a6xx/fd6_zsa.c \ + a6xx/fd6_zsa.h + ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c new file mode 100644 index 00000000000..e62163e797c --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_blend.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd6_blend.h" +#include "fd6_context.h" +#include "fd6_format.h" + +// XXX move somewhere common.. same across a3xx/a4xx/a5xx.. +static enum a3xx_rb_blend_opcode +blend_func(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return BLEND_DST_PLUS_SRC; + case PIPE_BLEND_MIN: + return BLEND_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return BLEND_MAX_DST_SRC; + case PIPE_BLEND_SUBTRACT: + return BLEND_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLEND_DST_MINUS_SRC; + default: + DBG("invalid blend func: %x", func); + return 0; + } +} + +void * +fd6_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd6_blend_stateobj *so; + enum a3xx_rop_code rop = ROP_COPY; + bool reads_dest = false; + unsigned i, mrt_blend = 0; + + if (cso->logicop_enable) { + rop = cso->logicop_func; /* maps 1:1 */ + + switch (cso->logicop_func) { + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_AND_INVERTED: + case PIPE_LOGICOP_AND_REVERSE: + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_AND: + case PIPE_LOGICOP_EQUIV: + case PIPE_LOGICOP_NOOP: + case PIPE_LOGICOP_OR_INVERTED: + case PIPE_LOGICOP_OR_REVERSE: + case PIPE_LOGICOP_OR: + reads_dest = true; + break; + } + } + + so = CALLOC_STRUCT(fd6_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->lrz_write = true; /* unless blend enabled for any MRT */ + + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { + const struct pipe_rt_blend_state *rt; + + if (cso->independent_blend_enable) + rt = &cso->rt[i]; + else + rt = &cso->rt[0]; + + so->rb_mrt[i].blend_control_rgb = + A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | + A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + + so->rb_mrt[i].blend_control_no_alpha_rgb = + A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + + so->rb_mrt[i].control = + A6XX_RB_MRT_CONTROL_ROP_CODE(rop) | + COND(cso->logicop_enable, A6XX_RB_MRT_CONTROL_ROP_ENABLE) | + A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); + + if (rt->blend_enable) { + so->rb_mrt[i].control |= +// A6XX_RB_MRT_CONTROL_READ_DEST_ENABLE | + A6XX_RB_MRT_CONTROL_BLEND | + A6XX_RB_MRT_CONTROL_BLEND2; + mrt_blend |= (1 << i); + so->lrz_write = false; + } + + if (reads_dest) { +// so->rb_mrt[i].control |= A6XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } + +// if (cso->dither) +// so->rb_mrt[i].buf_info |= A6XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); + } + + so->rb_blend_cntl = A6XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); + so->sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8 | + COND(mrt_blend, A6XX_SP_BLEND_CNTL_ENABLED); + + return so; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h new file mode 100644 index 00000000000..45ad4ea071f --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_BLEND_H_ +#define FD6_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd6_blend_stateobj { + struct pipe_blend_state base; + + struct { + uint32_t control; + uint32_t buf_info; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; + } rb_mrt[A6XX_MAX_RENDER_TARGETS]; + uint32_t rb_blend_cntl; + uint32_t sp_blend_cntl; + bool lrz_write; +}; + +static inline struct fd6_blend_stateobj * +fd6_blend_stateobj(struct pipe_blend_state *blend) +{ + return (struct fd6_blend_stateobj *)blend; +} + +void * fd6_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso); + +#endif /* FD6_BLEND_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c new file mode 100644 index 00000000000..4c415480875 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "freedreno_query_acc.h" + +#include "fd6_context.h" +#include "fd6_blend.h" +#include "fd6_draw.h" +#include "fd6_emit.h" +#include "fd6_gmem.h" +#include "fd6_program.h" +#include "fd6_query.h" +#include "fd6_rasterizer.h" +#include "fd6_texture.h" +#include "fd6_zsa.h" + +static void +fd6_context_destroy(struct pipe_context *pctx) +{ + struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + + fd_bo_del(fd6_ctx->vs_pvt_mem); + fd_bo_del(fd6_ctx->fs_pvt_mem); + fd_bo_del(fd6_ctx->vsc_size_mem); + fd_bo_del(fd6_ctx->blit_mem); + + fd_context_cleanup_common_vbos(&fd6_ctx->base); + + u_upload_destroy(fd6_ctx->border_color_uploader); + + fd_context_destroy(pctx); + + free(fd6_ctx); +} + +static const uint8_t primtypes[] = { + [PIPE_PRIM_POINTS] = DI_PT_POINTLIST, + [PIPE_PRIM_LINES] = DI_PT_LINELIST, + [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP, + [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, + [PIPE_PRIM_MAX] = DI_PT_RECTLIST, /* internal clear blits */ +}; + +struct pipe_context * +fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd6_context *fd6_ctx = CALLOC_STRUCT(fd6_context); + struct pipe_context *pctx; + + if (!fd6_ctx) + return NULL; + + pctx = &fd6_ctx->base.base; + + fd6_ctx->base.dev = fd_device_ref(screen->dev); + fd6_ctx->base.screen = fd_screen(pscreen); + + pctx->destroy = fd6_context_destroy; + pctx->create_blend_state = fd6_blend_state_create; + pctx->create_rasterizer_state = fd6_rasterizer_state_create; + pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create; + + fd6_draw_init(pctx); + fd6_gmem_init(pctx); + fd6_texture_init(pctx); + fd6_prog_init(pctx); + fd6_emit_init(pctx); + + pctx = fd_context_init(&fd6_ctx->base, pscreen, primtypes, priv, flags); + if (!pctx) + return NULL; + + fd6_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd6_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd6_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd6_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd_context_setup_common_vbos(&fd6_ctx->base); + + fd6_query_context_init(pctx); + + fd6_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, + PIPE_USAGE_STREAM, 0); + + return pctx; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h new file mode 100644 index 00000000000..7fd406650ee --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_CONTEXT_H_ +#define FD6_CONTEXT_H_ + +#include "util/u_upload_mgr.h" + +#include "freedreno_drmif.h" + +#include "freedreno_context.h" + +#include "ir3_shader.h" + +#include "a6xx.xml.h" + +struct fd6_context { + struct fd_context base; + + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; + + /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We + * could combine it with another allocation. + */ + struct fd_bo *vsc_size_mem; + + /* TODO not sure what this is for.. probably similar to + * CACHE_FLUSH_TS on kernel side, where value gets written + * to this address synchronized w/ 3d (ie. a way to + * synchronize when the CP is running far ahead) + */ + struct fd_bo *blit_mem; + + struct u_upload_mgr *border_color_uploader; + struct pipe_resource *border_color_buf; + + /* if *any* of bits are set in {v,f}saturate_{s,t,r} */ + bool vsaturate, fsaturate; + + /* bitmask of sampler which needs coords clamped for vertex + * shader: + */ + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; + + /* bitmask of sampler which needs coords clamped for frag + * shader: + */ + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; + + /* some state changes require a different shader variant. Keep + * track of this so we know when we need to re-emit shader state + * due to variant change. See fixup_shader_state() + */ + struct ir3_shader_key last_key; + + /* number of active samples-passed queries: */ + int samples_passed_queries; + + /* cached state about current emitted shader program (3d): */ + unsigned max_loc; +}; + +static inline struct fd6_context * +fd6_context(struct fd_context *ctx) +{ + return (struct fd6_context *)ctx; +} + +struct pipe_context * +fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); + +/* helper for places where we need to stall CP to wait for previous draws: */ +static inline void +fd6_emit_flush(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); + + OUT_WFI5(ring); +} + +static inline void +emit_marker6(struct fd_ringbuffer *ring, int scratch_idx) +{ + extern unsigned marker_cnt; + unsigned reg = REG_A6XX_CP_SCRATCH_REG(scratch_idx); + OUT_PKT4(ring, reg, 1); + OUT_RING(ring, ++marker_cnt); +} + +#endif /* FD6_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c new file mode 100644 index 00000000000..e4d44683ca7 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -0,0 +1,492 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd6_draw.h" +#include "fd6_context.h" +#include "fd6_emit.h" +#include "fd6_program.h" +#include "fd6_format.h" +#include "fd6_zsa.h" + + +static void +draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd6_emit *emit, unsigned index_offset) +{ + const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; + + fd6_emit_state(ctx, ring, emit); + + if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) + fd6_emit_vertex_bufs(ring, emit); + + OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, info->index_size ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */ + + OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ + info->restart_index : 0xffffffff); + + fd6_emit_render_cntl(ctx, false, emit->key.binning_pass); + fd6_draw_emit(ctx->batch, ring, primtype, + emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, + info, index_offset); +} + +/* fixup dirty shader state in case some "unrelated" (from the state- + * tracker's perspective) state change causes us to switch to a + * different variant. + */ +static void +fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct ir3_shader_key *last_key = &fd6_ctx->last_key; + + if (!ir3_shader_key_equal(last_key, key)) { + if (ir3_shader_key_changes_fs(last_key, key)) { + ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG; + ctx->dirty |= FD_DIRTY_PROG; + } + + if (ir3_shader_key_changes_vs(last_key, key)) { + ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG; + ctx->dirty |= FD_DIRTY_PROG; + } + + fd6_ctx->last_key = *key; + } +} + +static bool +fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, + unsigned index_offset) +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd6_emit emit = { + .debug = &ctx->debug, + .vtx = &ctx->vtx, + .prog = &ctx->prog, + .info = info, + .key = { + .color_two_side = ctx->rasterizer->light_twoside, + .vclamp_color = ctx->rasterizer->clamp_vertex_color, + .fclamp_color = ctx->rasterizer->clamp_fragment_color, + .rasterflat = ctx->rasterizer->flatshade, + .half_precision = ctx->in_blit && + fd_half_precision(&ctx->batch->framebuffer), + .ucp_enables = ctx->rasterizer->clip_plane_enable, + .has_per_samp = (fd6_ctx->fsaturate || fd6_ctx->vsaturate || + fd6_ctx->fastc_srgb || fd6_ctx->vastc_srgb), + .vsaturate_s = fd6_ctx->vsaturate_s, + .vsaturate_t = fd6_ctx->vsaturate_t, + .vsaturate_r = fd6_ctx->vsaturate_r, + .fsaturate_s = fd6_ctx->fsaturate_s, + .fsaturate_t = fd6_ctx->fsaturate_t, + .fsaturate_r = fd6_ctx->fsaturate_r, + .vastc_srgb = fd6_ctx->vastc_srgb, + .fastc_srgb = fd6_ctx->fastc_srgb, + .vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples, + .fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples, + }, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, + }; + + fixup_shader_state(ctx, &emit.key); + + unsigned dirty = ctx->dirty; + const struct ir3_shader_variant *vp = fd6_emit_get_vp(&emit); + const struct ir3_shader_variant *fp = fd6_emit_get_fp(&emit); + + /* do regular pass first, since that is more likely to fail compiling: */ + + if (!vp || !fp) + return false; + + ctx->stats.vs_regs += ir3_shader_halfregs(vp); + ctx->stats.fs_regs += ir3_shader_halfregs(fp); + + /* figure out whether we need to disable LRZ write for binning + * pass using draw pass's fp: + */ + emit.no_lrz_write = fp->writes_pos || fp->has_kill; + + emit.key.binning_pass = false; + emit.dirty = dirty; + + draw_impl(ctx, ctx->batch->draw, &emit, index_offset); + + /* and now binning pass: */ + emit.key.binning_pass = true; + emit.dirty = dirty & ~(FD_DIRTY_BLEND); + emit.vp = NULL; /* we changed key so need to refetch vp */ + emit.fp = NULL; + draw_impl(ctx, ctx->batch->binning, &emit, index_offset); + + if (emit.streamout_mask) { + struct fd_ringbuffer *ring = ctx->batch->draw; + + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + if (emit.streamout_mask & (1 << i)) { + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, FLUSH_SO_0 + i); + } + } + } + + fd_context_all_clean(ctx); + + return true; +} + +static bool is_z32(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return true; + default: + return false; + } +} + +#if 0 +static void +fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) +{ + struct fd_ringbuffer *ring; + uint32_t clear = util_pack_z(PIPE_FORMAT_Z16_UNORM, depth); + + // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth + // splitting both clear and lrz clear out into their own rb's. And + // just throw away any draws prior to clear. (Anything not fullscreen + // clear, just fallback to generic path that treats it as a normal + // draw + + if (!batch->lrz_clear) { + batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000); + fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); + } + + ring = batch->lrz_clear; + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); + + OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0x20fffff); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(0.0)); + + OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000181); + + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_RB_MRT_BUF_INFO(0), 5); + OUT_RING(ring, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(RB5_R16_UNORM) | + A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); + OUT_RING(ring, A6XX_RB_MRT_PITCH(zsbuf->lrz_pitch * 2)); + OUT_RING(ring, A6XX_RB_MRT_ARRAY_PITCH(fd_bo_size(zsbuf->lrz))); + OUT_RELOCW(ring, zsbuf->lrz, 0x1000, 0, 0); + + OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_RB_DEST_MSAA_CNTL, 1); + OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A6XX_RB_BLIT_CNTL_BUF(BLIT_MRT0)); + + OUT_PKT4(ring, REG_A6XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A6XX_RB_CLEAR_CNTL_FAST_CLEAR | + A6XX_RB_CLEAR_CNTL_MASK(0xf)); + + OUT_PKT4(ring, REG_A6XX_RB_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ + + OUT_PKT4(ring, REG_A6XX_VSC_RESOLVE_CNTL, 2); + OUT_RING(ring, A6XX_VSC_RESOLVE_CNTL_X(zsbuf->lrz_width) | + A6XX_VSC_RESOLVE_CNTL_Y(zsbuf->lrz_height)); + OUT_RING(ring, 0x00000000); // XXX UNKNOWN_0CDE + + OUT_PKT4(ring, REG_A6XX_RB_CNTL, 1); + OUT_RING(ring, A6XX_RB_CNTL_BYPASS); + + OUT_PKT4(ring, REG_A6XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A6XX_RB_RESOLVE_CNTL_1_X(0) | + A6XX_RB_RESOLVE_CNTL_1_Y(0)); + OUT_RING(ring, A6XX_RB_RESOLVE_CNTL_2_X(zsbuf->lrz_width - 1) | + A6XX_RB_RESOLVE_CNTL_2_Y(zsbuf->lrz_height - 1)); + + fd6_emit_blit(batch->ctx, ring); +} +#endif + +#if 0 +clear_with_cp_blit() +{ + /* Clear with CP_BLIT */ + WRITE(REG_A6XX_GRAS_2D_BLIT_CNTL, 0x10f43180); + + OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 7); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + WRITE(0xacc0, 0xf181); + WRITE(0xacc0, 0xf181); + + WRITE(REG_A6XX_GRAS_2D_BLIT_CNTL, 0x10f43180); + WRITE(REG_A6XX_RB_2D_BLIT_CNTL, 0x10f43180); + + OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0xff); + OUT_RING(ring, 0); + + DBG("%x %x %x %x\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]); + + struct pipe_surface *psurf = pfb->cbufs[0]; + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); + + uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + uint32_t stride = slice->pitch * rsc->cpp; + + enum a6xx_color_fmt format = fd6_pipe2color(pfmt); + OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); + OUT_RING(ring, + A6XX_RB_2D_DST_INFO_COLOR_FORMAT(format) | + A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WXYZ)); + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */ + OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(stride)); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); + OUT_RING(ring, + A6XX_GRAS_2D_DST_TL_X(ctx->batch->max_scissor.minx) | + A6XX_GRAS_2D_DST_TL_Y(ctx->batch->max_scissor.miny)); + OUT_RING(ring, + A6XX_GRAS_2D_DST_BR_X(ctx->batch->max_scissor.maxx) | + A6XX_GRAS_2D_DST_BR_Y(ctx->batch->max_scissor.maxy)); + + OUT_PKT7(ring, CP_BLIT, 1); + OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); +} +#endif + +static bool +fd6_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + struct fd_ringbuffer *ring = ctx->batch->draw; + + if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && + is_z32(pfb->zsbuf->format)) + return false; + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + + fd6_emit_render_cntl(ctx, true, false); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) | + A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny)); + OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx) | + A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy)); + + if (buffers & PIPE_CLEAR_COLOR) { + for (int i = 0; i < pfb->nr_cbufs; i++) { + union util_color uc = {0}; + + if (!pfb->cbufs[i]) + continue; + + if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + enum pipe_format pfmt = pfb->cbufs[i]->format; + + // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? + union pipe_color_union swapped; + switch (fd6_pipe2swap(pfmt)) { + case WZYX: + swapped.ui[0] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[2] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case WXYZ: + swapped.ui[2] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[0] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case ZYXW: + swapped.ui[3] = color->ui[0]; + swapped.ui[0] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[2] = color->ui[3]; + break; + case XYZW: + swapped.ui[3] = color->ui[0]; + swapped.ui[2] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[0] = color->ui[3]; + break; + } + + if (util_format_is_pure_uint(pfmt)) { + util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); + } else if (util_format_is_pure_sint(pfmt)) { + util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); + } else { + util_pack_color(swapped.f, pfmt, &uc); + } + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RINGP(ring, i, &ctx->batch->gmem_patches); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, uc.ui[0]); + OUT_RING(ring, uc.ui[1]); + OUT_RING(ring, uc.ui[2]); + OUT_RING(ring, uc.ui[3]); + + fd6_emit_blit(ctx, ring); + } + } + + if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + enum pipe_format pfmt = pfb->zsbuf->format; + uint32_t clear = util_pack_z_stencil(pfmt, depth, stencil); + uint32_t mask = 0; + + if (buffers & PIPE_CLEAR_DEPTH) + mask |= 0x1; + + if (buffers & PIPE_CLEAR_STENCIL) + mask |= 0x2; + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + // XXX UNK0 for separate stencil ?? + A6XX_RB_BLIT_INFO_DEPTH | + A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); + + fd6_emit_blit(ctx, ring); + +#if 0 + if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) { + struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); + if (zsbuf->lrz) { + zsbuf->lrz_valid = true; + fd6_clear_lrz(ctx->batch, zsbuf, depth); + } + } +#endif + } + + return true; +} + +void +fd6_draw_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->draw_vbo = fd6_draw_vbo; + ctx->clear = fd6_clear; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.h b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h new file mode 100644 index 00000000000..8f3c058cf5c --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_DRAW_H_ +#define FD6_DRAW_H_ + +#include "pipe/p_context.h" + +#include "freedreno_draw.h" + +#include "fd6_context.h" + +/* some bits in common w/ a4xx: */ +#include "a4xx/fd4_draw.h" + +void fd6_draw_init(struct pipe_context *pctx); + +static inline void +fd6_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + enum pc_di_src_sel src_sel, uint32_t count, + uint32_t instances, enum a4xx_index_size idx_type, + uint32_t idx_size, uint32_t idx_offset, + struct pipe_resource *idx_buffer) +{ + /* for debug after a lock up, write a unique counter value + * to scratch7 for each draw, to make it easier to match up + * register dumps to cmdstream. The combination of IB + * (scratch6) and DRAW is enough to "triangulate" the + * particular draw that caused lockup. + */ + emit_marker6(ring, 7); + + OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 7 : 3); + if (vismode == USE_VISIBILITY) { + /* leave vis mode blank for now, it will be patched up when + * we know if we are binning or not + */ + OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0) | 0x2000, + &batch->draw_patches); + } else { + OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode) | 0x2000); + } + OUT_RING(ring, instances); /* NumInstances */ + OUT_RING(ring, count); /* NumIndices */ + if (idx_buffer) { + OUT_RING(ring, 0x0); /* XXX */ + OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); + OUT_RING (ring, idx_size); + } + + emit_marker6(ring, 7); + + fd_reset_wfi(batch); +} + +static inline void +fd6_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + const struct pipe_draw_info *info, + unsigned index_offset) +{ + struct pipe_resource *idx_buffer = NULL; + enum a4xx_index_size idx_type; + enum pc_di_src_sel src_sel; + uint32_t idx_size, idx_offset; + + if (info->indirect) { + struct fd_resource *ind = fd_resource(info->indirect->buffer); + + emit_marker6(ring, 7); + + if (info->index_size) { + struct pipe_resource *idx = info->index.resource; + unsigned max_indicies = (idx->width0 - info->indirect->offset) / + info->index_size; + + OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6); + OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, + fd4_size2indextype(info->index_size), 0), + &batch->draw_patches); + OUT_RELOC(ring, fd_resource(idx)->bo, + index_offset, 0, 0); + // XXX: Check A5xx vs A6xx + OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indicies)); + OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); + } else { + OUT_PKT7(ring, CP_DRAW_INDIRECT, 3); + OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0), + &batch->draw_patches); + OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); + } + + emit_marker6(ring, 7); + fd_reset_wfi(batch); + + return; + } + + if (info->index_size) { + assert(!info->has_user_indices); + + idx_buffer = info->index.resource; + idx_type = fd4_size2indextype(info->index_size); + idx_size = info->index_size * info->count; + idx_offset = index_offset + info->start * info->index_size; + src_sel = DI_SRC_SEL_DMA; + } else { + idx_buffer = NULL; + idx_type = INDEX4_SIZE_32_BIT; + idx_size = 0; + idx_offset = 0; + src_sel = DI_SRC_SEL_AUTO_INDEX; + } + + fd6_draw(batch, ring, primtype, vismode, src_sel, + info->count, info->instance_count, + idx_type, idx_size, idx_offset, idx_buffer); +} + +#endif /* FD6_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c new file mode 100644 index 00000000000..f1dd081ebc3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -0,0 +1,1137 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" +#include "util/u_format.h" +#include "util/u_viewport.h" + +#include "freedreno_resource.h" +#include "freedreno_query_hw.h" + +#include "fd6_emit.h" +#include "fd6_blend.h" +#include "fd6_context.h" +#include "fd6_image.h" +#include "fd6_program.h" +#include "fd6_rasterizer.h" +#include "fd6_texture.h" +#include "fd6_format.h" +#include "fd6_zsa.h" + +static uint32_t +shader_t_to_opcode(enum shader_t type) +{ + switch (type) { + case SHADER_VERTEX: + case SHADER_TCS: + case SHADER_TES: + case SHADER_GEOM: + return CP_LOAD_STATE6_GEOM; + case SHADER_FRAGMENT: + case SHADER_COMPUTE: + return CP_LOAD_STATE6_FRAG; + default: + unreachable("bad shader type"); + } +} + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +static void +fd6_emit_const(struct fd_ringbuffer *ring, enum shader_t type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz; + enum a6xx_state_src src; + + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + + if (prsc) { + sz = 0; + src = SS6_INDIRECT; + } else { + sz = sizedwords; + src = SS6_DIRECT; + } + + OUT_PKT7(ring, shader_t_to_opcode(type), 3 + sz); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(src) | + CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(sizedwords/4)); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, 0, 0); + } else { + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } +} + +static void +fd6_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) +{ + uint32_t anum = align(num, 2); + uint32_t i; + + debug_assert((regid % 4) == 0); + + OUT_PKT7(ring, shader_t_to_opcode(type), 3 + (2 * anum)); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(anum/2)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + + for (i = 0; i < num; i++) { + if (prscs[i]) { + if (write) { + OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } else { + OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); + OUT_RING(ring, 0xbad00000 | (i << 16)); + } + } + + for (; i < anum; i++) { + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + } +} + +/* Border color layout is diff from a4xx/a5xx.. if it turns out to be + * the same as a6xx then move this somewhere common ;-) + * + * Entry layout looks like (total size, 0x60 bytes): + */ + +struct PACKED bcolor_entry { + uint32_t fp32[4]; + uint16_t ui16[4]; + int16_t si16[4]; + uint16_t fp16[4]; + uint16_t rgb565; + uint16_t rgb5a1; + uint16_t rgba4; + uint8_t __pad0[2]; + uint8_t ui8[4]; + int8_t si8[4]; + uint32_t rgb10a2; + uint32_t z24; /* also s8? */ + uint8_t __pad1[32]; +}; + +#define FD6_BORDER_COLOR_SIZE 0x60 +#define FD6_BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE) + +static void +setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entries) +{ + unsigned i, j; + STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE); + + for (i = 0; i < tex->num_samplers; i++) { + struct bcolor_entry *e = &entries[i]; + struct pipe_sampler_state *sampler = tex->samplers[i]; + union pipe_color_union *bc; + + if (!sampler) + continue; + + bc = &sampler->border_color; + + /* + * XXX HACK ALERT XXX + * + * The border colors need to be swizzled in a particular + * format-dependent order. Even though samplers don't know about + * formats, we can assume that with a GL state tracker, there's a + * 1:1 correspondence between sampler and texture. Take advantage + * of that knowledge. + */ + if ((i >= tex->num_textures) || !tex->textures[i]) + continue; + + const struct util_format_description *desc = + util_format_description(tex->textures[i]->format); + + e->rgb565 = 0; + e->rgb5a1 = 0; + e->rgba4 = 0; + e->rgb10a2 = 0; + e->z24 = 0; + + for (j = 0; j < 4; j++) { + int c = desc->swizzle[j]; + + if (c >= 4) + continue; + + if (desc->channel[c].pure_integer) { + uint16_t clamped; + switch (desc->channel[c].size) { + case 2: + assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); + clamped = CLAMP(bc->ui[j], 0, 0x3); + break; + case 8: + if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) + clamped = CLAMP(bc->i[j], -128, 127); + else + clamped = CLAMP(bc->ui[j], 0, 255); + break; + case 10: + assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED); + clamped = CLAMP(bc->ui[j], 0, 0x3ff); + break; + case 16: + if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED) + clamped = CLAMP(bc->i[j], -32768, 32767); + else + clamped = CLAMP(bc->ui[j], 0, 65535); + break; + default: + assert(!"Unexpected bit size"); + case 32: + clamped = 0; + break; + } + e->fp32[c] = bc->ui[j]; + e->fp16[c] = clamped; + } else { + float f = bc->f[j]; + float f_u = CLAMP(f, 0, 1); + float f_s = CLAMP(f, -1, 1); + + e->fp32[c] = fui(f); + e->fp16[c] = util_float_to_half(f); + e->ui16[c] = f_u * 0xffff; + e->si16[c] = f_s * 0x7fff; + e->ui8[c] = f_u * 0xff; + e->si8[c] = f_s * 0x7f; + if (c == 1) + e->rgb565 |= (int)(f_u * 0x3f) << 5; + else if (c < 3) + e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0); + if (c == 3) + e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0; + else + e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5); + if (c == 3) + e->rgb10a2 |= (int)(f_u * 0x3) << 30; + else + e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10); + e->rgba4 |= (int)(f_u * 0xf) << (c * 4); + if (c == 0) + e->z24 = f_u * 0xffffff; + } + } + +#ifdef DEBUG + memset(&e->__pad0, 0, sizeof(e->__pad0)); + memset(&e->__pad1, 0, sizeof(e->__pad1)); +#endif + } +} + +static void +emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct bcolor_entry *entries; + unsigned off; + void *ptr; + + STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE); + + u_upload_alloc(fd6_ctx->border_color_uploader, + 0, FD6_BORDER_COLOR_UPLOAD_SIZE, + FD6_BORDER_COLOR_UPLOAD_SIZE, &off, + &fd6_ctx->border_color_buf, + &ptr); + + entries = ptr; + + setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]); + setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT], + &entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]); + + OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2); + OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0); + + u_upload_unmap(fd6_ctx->border_color_uploader); +} + +static bool +emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum a6xx_state_block sb, struct fd_texture_stateobj *tex) +{ + bool needs_border = false; + unsigned bcolor_offset; + unsigned opcode, tex_samp_reg, tex_const_reg; + + switch (sb) { + case SB6_VS_TEX: + opcode = CP_LOAD_STATE6_GEOM; + bcolor_offset = 0; + tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO; + break; + case SB6_FS_TEX: + opcode = CP_LOAD_STATE6_FRAG; + bcolor_offset = ctx->tex[PIPE_SHADER_VERTEX].num_samplers; + tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO; + break; + case SB6_CS_TEX: + opcode = CP_LOAD_STATE6_FRAG; + bcolor_offset = 0; + tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO; + break; + default: + unreachable("bad state block"); + } + + + if (tex->num_samplers > 0) { + struct fd_ringbuffer *state = + fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4); + for (unsigned i = 0; i < tex->num_samplers; i++) { + static const struct fd6_sampler_stateobj dummy_sampler = {}; + const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ? + fd6_sampler_stateobj(tex->samplers[i]) : &dummy_sampler; + OUT_RING(state, sampler->texsamp0); + OUT_RING(state, sampler->texsamp1); + OUT_RING(state, sampler->texsamp2 | + A6XX_TEX_SAMP_2_BCOLOR_OFFSET(bcolor_offset)); + OUT_RING(state, sampler->texsamp3); + needs_border |= sampler->needs_border; + } + + /* output sampler state: */ + OUT_PKT7(ring, opcode, 3); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers)); + OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ + + OUT_PKT4(ring, tex_samp_reg, 2); + OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ + + fd_ringbuffer_del(state); + } + + if (tex->num_textures > 0) { + struct fd_ringbuffer *state = + fd_ringbuffer_new_object(ctx->pipe, tex->num_textures * 16); + for (unsigned i = 0; i < tex->num_textures; i++) { + static const struct fd6_pipe_sampler_view dummy_view = {}; + const struct fd6_pipe_sampler_view *view = tex->textures[i] ? + fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view; + enum a6xx_tile_mode tile_mode = TILE6_LINEAR; + + if (view->base.texture) + tile_mode = fd_resource(view->base.texture)->tile_mode; + + OUT_RING(state, view->texconst0 | + A6XX_TEX_CONST_0_TILE_MODE(tile_mode)); + OUT_RING(state, view->texconst1); + OUT_RING(state, view->texconst2); + OUT_RING(state, view->texconst3); + + if (view->base.texture) { + struct fd_resource *rsc = fd_resource(view->base.texture); + if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT) + rsc = rsc->stencil; + OUT_RELOC(state, rsc->bo, view->offset, + (uint64_t)view->texconst5 << 32, 0); + } else { + OUT_RING(state, 0x00000000); + OUT_RING(state, view->texconst5); + } + + OUT_RING(state, view->texconst6); + OUT_RING(state, view->texconst7); + OUT_RING(state, view->texconst8); + OUT_RING(state, view->texconst9); + OUT_RING(state, view->texconst10); + OUT_RING(state, view->texconst11); + OUT_RING(state, 0); + OUT_RING(state, 0); + OUT_RING(state, 0); + OUT_RING(state, 0); + } + + /* emit texture state: */ + OUT_PKT7(ring, opcode, 3); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(tex->num_textures)); + OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ + + OUT_PKT4(ring, tex_const_reg, 2); + OUT_RB(ring, state); /* SRC_ADDR_LO/HI */ + + fd_ringbuffer_del(state); + } + + return needs_border; +} + +static void +emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum a6xx_state_block sb, struct fd_shaderbuf_stateobj *so) +{ + unsigned count = util_last_bit(so->enabled_mask); + unsigned opcode; + + if (count == 0) + return; + + switch (sb) { + case SB6_SSBO: + case SB6_CS_SSBO: + opcode = CP_LOAD_STATE6_GEOM; + break; + default: + unreachable("bad state block"); + } + + OUT_PKT7(ring, opcode, 3 + (4 * count)); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(0) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + +#if 0 + OUT_PKT7(ring, opcode, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(1) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + unsigned sz = buf->buffer_size; + + /* width is in dwords, overflows into height: */ + sz /= 4; + + OUT_RING(ring, A6XX_SSBO_1_0_WIDTH(sz)); + OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(sz >> 16)); + } +#endif + + OUT_PKT7(ring, opcode, 3 + (2 * count)); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(2) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(count)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + for (unsigned i = 0; i < count; i++) { + struct pipe_shader_buffer *buf = &so->sb[i]; + if (buf->buffer) { + struct fd_resource *rsc = fd_resource(buf->buffer); + OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } +} + +void +fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit) +{ + int32_t i, j; + const struct fd_vertex_state *vtx = emit->vtx; + const struct ir3_shader_variant *vp = fd6_emit_get_vp(emit); + + for (i = 0, j = 0; i <= vp->inputs_count; i++) { + if (vp->inputs[i].sysval) + continue; + if (vp->inputs[i].compmask) { + struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; + const struct pipe_vertex_buffer *vb = + &vtx->vertexbuf.vb[elem->vertex_buffer_index]; + struct fd_resource *rsc = fd_resource(vb->buffer.resource); + enum pipe_format pfmt = elem->src_format; + enum a6xx_vtx_fmt fmt = fd6_pipe2vtx(pfmt); + bool isint = util_format_is_pure_integer(pfmt); + uint32_t off = vb->buffer_offset + elem->src_offset; + uint32_t size = fd_bo_size(rsc->bo) - off; + debug_assert(fmt != ~0); + + OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4); + OUT_RELOC(ring, rsc->bo, off, 0, 0); + OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ + OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + + OUT_PKT4(ring, REG_A6XX_VFD_DECODE(j), 2); + OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(j) | + A6XX_VFD_DECODE_INSTR_FORMAT(fmt) | + COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) | + A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) | + A6XX_VFD_DECODE_INSTR_UNK30 | + COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT)); + OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ + + OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(j), 1); + OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) | + A6XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid)); + + j++; + } + } + + OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1); + OUT_RING(ring, A6XX_VFD_CONTROL_0_VTXCNT(j) | (j << 8)); +} + +void +fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd6_emit *emit) +{ + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + const struct ir3_shader_variant *vp = fd6_emit_get_vp(emit); + const struct ir3_shader_variant *fp = fd6_emit_get_fp(emit); + const enum fd_dirty_3d_state dirty = emit->dirty; + bool needs_border = false; + + emit_marker6(ring, 5); + + if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { + unsigned char mrt_comp[A6XX_MAX_RENDER_TARGETS] = {0}; + + for (unsigned i = 0; i < pfb->nr_cbufs; i++) { + mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; + } + + OUT_PKT4(ring, REG_A6XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + + OUT_PKT4(ring, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1); + OUT_RING(ring, + A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[0]) | + A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[0])); + } + + if (dirty & FD_DIRTY_ZSA) { + struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A6XX_RB_ALPHA_CONTROL_ALPHA_TEST; + + OUT_PKT4(ring, REG_A6XX_RB_ALPHA_CONTROL, 1); + OUT_RING(ring, rb_alpha_control); + + OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, zsa->rb_stencil_control); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG)) { + struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); + struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); + + if (pfb->zsbuf) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; + + if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) + gras_lrz_cntl = 0; + else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write) + gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE; + + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, gras_lrz_cntl); + } + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 3); + OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0])); // TODO bf? + OUT_RING(ring, zsa->rb_stencilmask); + OUT_RING(ring, zsa->rb_stencilwrmask); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa); + //bool fragz = fp->has_kill | fp->writes_pos; + + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1); + OUT_RING(ring, zsa->rb_depth_cntl); + +#if 0 + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | + COND(fragz && fp->frag_coord, A6XX_RB_DEPTH_PLANE_CNTL_UNK1)); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z) | + COND(fragz && fp->frag_coord, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1)); +#endif + } + + if (dirty & FD_DIRTY_SCISSOR) { + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + OUT_PKT4(ring, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); + OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); + OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + fd_wfi(ctx->batch, ring); + OUT_PKT4(ring, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); + OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); + } + + if (dirty & FD_DIRTY_PROG) + fd6_program_emit(ctx, ring, emit); + + if (dirty & FD_DIRTY_RASTERIZER) { + struct fd6_rasterizer_stateobj *rasterizer = + fd6_rasterizer_stateobj(ctx->rasterizer); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8000, 1); + OUT_RING(ring, 0x80); + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8001, 1); + OUT_RING(ring, 0x0); + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8004, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, rasterizer->gras_su_cntl); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, rasterizer->gras_su_point_minmax); + OUT_RING(ring, rasterizer->gras_su_point_size); + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); + OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); + OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); + OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp); + +#if 0 + OUT_PKT4(ring, REG_A6XX_PC_RASTER_CNTL, 1); + OUT_RING(ring, rasterizer->pc_raster_cntl); + + OUT_PKT4(ring, REG_A6XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, rasterizer->gras_cl_clip_cntl); +#endif + } + + /* note: must come after program emit.. because there is some overlap + * in registers, ex. PC_PRIMITIVE_CNTL and we rely on some cached + * values from fd6_program_emit() to avoid having to re-emit the prog + * every time rast state changes. + * + * Since the primitive restart state is not part of a tracked object, we + * re-emit this register every time. + */ + if (emit->info && ctx->rasterizer) { + struct fd6_rasterizer_stateobj *rasterizer = + fd6_rasterizer_stateobj(ctx->rasterizer); + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9806, 1); + OUT_RING(ring, 0); + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9990, 1); + OUT_RING(ring, 0); + OUT_PKT4(ring, REG_A6XX_VFD_UNKNOWN_A008, 1); + OUT_RING(ring, 0); + + + OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); + OUT_RING(ring, rasterizer->pc_primitive_cntl | + COND(emit->info->primitive_restart && emit->info->index_size, + A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART)); + } + + if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); + unsigned nr = pfb->nr_cbufs; + + if (emit->key.binning_pass) + nr = 0; + else if (ctx->rasterizer->rasterizer_discard) + nr = 0; + + OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); + OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z)); + OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr)); + + OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); + OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) | + 0xfcfc0000); + OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr)); + } + + ir3_emit_vs_consts(vp, ring, ctx, emit->info); + if (!emit->key.binning_pass) + ir3_emit_fs_consts(fp, ring, ctx); + + struct pipe_stream_output_info *info = &vp->shader->stream_output; + if (info->num_outputs) { + struct fd_streamout_stateobj *so = &ctx->streamout; + + for (unsigned i = 0; i < so->num_targets; i++) { + struct pipe_stream_output_target *target = so->targets[i]; + + if (!target) + continue; + + unsigned offset = (so->offsets[i] * info->stride[i] * 4) + + target->buffer_offset; + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3); + /* VPC_SO[i].BUFFER_BASE_LO: */ + OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0); + OUT_RING(ring, target->buffer_size + offset); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3); + OUT_RING(ring, offset); + /* VPC_SO[i].FLUSH_BASE_LO/HI: */ + // TODO just give hw a dummy addr for now.. we should + // be using this an then CP_MEM_TO_REG to set the + // VPC_SO[i].BUFFER_OFFSET for the next draw.. + OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0x100, 0, 0); + + emit->streamout_mask |= (1 << i); + } + } + + if ((dirty & FD_DIRTY_BLEND)) { + struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); + uint32_t i; + + for (i = 0; i < A6XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A6XX_RB_MRT_CONTROL_BLEND2; + } + + OUT_PKT4(ring, REG_A6XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, control); + + OUT_PKT4(ring, REG_A6XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, blend_control); + } + + OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1); + OUT_RING(ring, blend->rb_blend_cntl | + A6XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff)); + + OUT_PKT4(ring, REG_A6XX_SP_BLEND_CNTL, 1); + OUT_RING(ring, blend->sp_blend_cntl); + } + + if (dirty & FD_DIRTY_BLEND_COLOR) { + struct pipe_blend_color *bcolor = &ctx->blend_color; + + OUT_PKT4(ring, REG_A6XX_RB_BLEND_RED_F32, 4); + OUT_RING(ring, A6XX_RB_BLEND_RED_F32(bcolor->color[0])); + OUT_RING(ring, A6XX_RB_BLEND_GREEN_F32(bcolor->color[1])); + OUT_RING(ring, A6XX_RB_BLEND_BLUE_F32(bcolor->color[2])); + OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); + } + + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) { + needs_border |= emit_textures(ctx, ring, SB6_VS_TEX, + &ctx->tex[PIPE_SHADER_VERTEX]); + OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); + OUT_RING(ring, ctx->tex[PIPE_SHADER_VERTEX].num_textures); + } + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) { + needs_border |= emit_textures(ctx, ring, SB6_FS_TEX, + &ctx->tex[PIPE_SHADER_FRAGMENT]); + OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1); + OUT_RING(ring, ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + } + +#if 0 + OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask ? + ~0 : ctx->tex[PIPE_SHADER_FRAGMENT].num_textures); + + OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1); + OUT_RING(ring, 0); +#endif + + if (needs_border) + emit_border_color(ctx, ring); + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB6_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); + + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) + fd6_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT); +} + +void +fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; + + if (dirty & FD_DIRTY_SHADER_TEX) { + bool needs_border = false; + needs_border |= emit_textures(ctx, ring, SB6_CS_TEX, + &ctx->tex[PIPE_SHADER_COMPUTE]); + + if (needs_border) + emit_border_color(ctx, ring); + +#if 0 + OUT_PKT4(ring, REG_A6XX_TPL1_VS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_TPL1_HS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_TPL1_DS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_TPL1_GS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, 0); +#endif + } + +#if 0 + OUT_PKT4(ring, REG_A6XX_TPL1_CS_TEX_COUNT, 1); + OUT_RING(ring, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask ? + ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures); +#endif + + if (dirty & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB6_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]); + + if (dirty & FD_DIRTY_SHADER_IMAGE) + fd6_emit_images(ctx, ring, PIPE_SHADER_COMPUTE); +} + + +/* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +void +fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + //struct fd_context *ctx = batch->ctx; + + fd6_cache_flush(batch, ring); + + OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0xfffff); + +/* +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024048: 70d08003 00000000 001c5000 00000005 +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024058: 70d08003 00000010 001c7000 00000005 + +t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500024068: 70268000 +*/ + + WRITE(REG_A6XX_RB_CCU_CNTL, 0x7c400004); + WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); + WRITE(REG_A6XX_SP_UNKNOWN_AE04, 0x8); + WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0); + WRITE(REG_A6XX_SP_UNKNOWN_AE0F, 0x3f); + WRITE(REG_A6XX_SP_UNKNOWN_B605, 0x44); + WRITE(REG_A6XX_SP_UNKNOWN_B600, 0x100000); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0); + + WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0); + WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); + WRITE(REG_A6XX_SP_UNKNOWN_AB20, 0); + WRITE(REG_A6XX_SP_UNKNOWN_B182, 0); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BB11, 0); + WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); + WRITE(REG_A6XX_UCHE_CLIENT_PF, 4); + WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x0); + WRITE(REG_A6XX_SP_UNKNOWN_AB00, 0x5); + WRITE(REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); + WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010); + WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f); + + OUT_PKT4(ring, REG_A6XX_RB_SRGB_CNTL, 1); + OUT_RING(ring, 0); + + WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0); + WRITE(REG_A6XX_GRAS_2D_BLIT_INFO, + A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(RB6_R8G8B8_UNORM)); + WRITE(REG_A6XX_GRAS_UNKNOWN_8109, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0); + + WRITE(REG_A6XX_RB_RENDER_CONTROL0, 0x401); + WRITE(REG_A6XX_RB_RENDER_CONTROL1, 0); + WRITE(REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8810, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8818, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8819, 0); + WRITE(REG_A6XX_RB_UNKNOWN_881A, 0); + WRITE(REG_A6XX_RB_UNKNOWN_881B, 0); + WRITE(REG_A6XX_RB_UNKNOWN_881C, 0); + WRITE(REG_A6XX_RB_UNKNOWN_881D, 0); + WRITE(REG_A6XX_RB_UNKNOWN_881E, 0); + WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0); + + WRITE(REG_A6XX_VPC_UNKNOWN_9101, 0xffff00); + WRITE(REG_A6XX_VPC_UNKNOWN_9107, 0); + + WRITE(REG_A6XX_VPC_UNKNOWN_9236, 1); + WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0); + + WRITE(REG_A6XX_VPC_UNKNOWN_9306, 1); + + WRITE(REG_A6XX_PC_UNKNOWN_9801, 0); + WRITE(REG_A6XX_PC_UNKNOWN_9806, 0); + WRITE(REG_A6XX_PC_UNKNOWN_9980, 0); + + WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); + WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); + + WRITE(REG_A6XX_SP_UNKNOWN_A81B, 0); + + WRITE(REG_A6XX_SP_UNKNOWN_B183, 0); + + WRITE(REG_A6XX_GRAS_UNKNOWN_8099, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_809B, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_80A0, 2); + WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0); + WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0); + WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0); + WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0); + WRITE(REG_A6XX_PC_UNKNOWN_9981, 0x3); + WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); + WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3); + WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0); + WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); + WRITE(REG_A6XX_RB_UNKNOWN_8804, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_80A6, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8805, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8806, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8878, 0); + WRITE(REG_A6XX_RB_UNKNOWN_8879, 0); + WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); + + emit_marker6(ring, 7); + + OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */ + + WRITE(REG_A6XX_VFD_UNKNOWN_A008, 0); + + OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1); + OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */ + + /* we don't use this yet.. probably best to disable.. */ + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3); + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ + + OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2); + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUF_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */ + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */ + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1); + OUT_RING(ring, 0x00000000); +} + +static void +fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + emit_marker6(ring, 6); + __OUT_IB5(ring, target); + emit_marker6(ring, 6); +} + +static void +fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, + unsigned dst_off, struct pipe_resource *src, unsigned src_off, + unsigned sizedwords) +{ + struct fd_bo *src_bo = fd_resource(src)->bo; + struct fd_bo *dst_bo = fd_resource(dst)->bo; + unsigned i; + + for (i = 0; i < sizedwords; i++) { + OUT_PKT7(ring, CP_MEM_TO_MEM, 5); + OUT_RING(ring, 0x00000000); + OUT_RELOCW(ring, dst_bo, dst_off, 0, 0); + OUT_RELOC (ring, src_bo, src_off, 0, 0); + + dst_off += 4; + src_off += 4; + } +} + +void +fd6_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd6_emit_const; + ctx->emit_const_bo = fd6_emit_const_bo; + ctx->emit_ib = fd6_emit_ib; + ctx->mem_to_mem = fd6_mem_to_mem; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h new file mode 100644 index 00000000000..a603017dd84 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -0,0 +1,209 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_EMIT_H +#define FD6_EMIT_H + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "fd6_context.h" +#include "fd6_format.h" +#include "fd6_program.h" +#include "ir3_shader.h" + +struct fd_ringbuffer; + +/* grouped together emit-state for prog/vertex/state emit: */ +struct fd6_emit { + struct pipe_debug_callback *debug; + const struct fd_vertex_state *vtx; + const struct fd_program_stateobj *prog; + const struct pipe_draw_info *info; + struct ir3_shader_key key; + enum fd_dirty_3d_state dirty; + + uint32_t sprite_coord_enable; /* bitmask */ + bool sprite_coord_mode; + bool rasterflat; + bool no_decode_srgb; + + /* in binning pass, we don't have real frag shader, so we + * don't know if real draw disqualifies lrz write. So just + * figure that out up-front and stash it in the emit. + */ + bool no_lrz_write; + + /* cached to avoid repeated lookups of same variants: */ + const struct ir3_shader_variant *vp, *fp; + /* TODO: other shader stages.. */ + + unsigned streamout_mask; +}; + +static inline enum a6xx_color_fmt fd6_emit_format(struct pipe_surface *surf) +{ + if (!surf) + return 0; + return fd6_pipe2color(surf->format); +} + +static inline const struct ir3_shader_variant * +fd6_emit_get_vp(struct fd6_emit *emit) +{ + if (!emit->vp) { + struct fd6_shader_stateobj *so = emit->prog->vp; + emit->vp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + return emit->vp; +} + +static inline const struct ir3_shader_variant * +fd6_emit_get_fp(struct fd6_emit *emit) +{ + if (!emit->fp) { + if (emit->key.binning_pass) { + /* use dummy stateobj to simplify binning vs non-binning: */ + static const struct ir3_shader_variant binning_fp = {}; + emit->fp = &binning_fp; + } else { + struct fd6_shader_stateobj *so = emit->prog->fp; + emit->fp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + } + return emit->fp; +} + +static inline void +fd6_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + fd_reset_wfi(batch); +#if 0 + OUT_PKT4(ring, REG_A6XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5); + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */ + OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */ + fd_wfi(batch, ring); +#else + DBG("fd6_cache_flush stub"); +#endif +} + +static inline void +fd6_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + emit_marker6(ring, 7); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(BLIT)); + + emit_marker6(ring, 7); +} + +static inline void +fd6_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) +{ +#if 0 + struct fd_ringbuffer *ring = binning ? ctx->batch->binning : ctx->batch->draw; + + /* TODO eventually this partially depends on the pfb state, ie. + * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part + * we could probably cache and just regenerate if framebuffer + * state is dirty (or something like that).. + * + * Other bits seem to depend on query state, like if samples-passed + * query is active. + */ + bool samples_passed = (fd6_context(ctx)->samples_passed_queries > 0); + OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ + COND(binning, A6XX_RB_RENDER_CNTL_BINNING_PASS) | + COND(binning, A6XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) | + COND(samples_passed, A6XX_RB_RENDER_CNTL_SAMPLES_PASSED) | + COND(!blit, 0x8)); + OUT_PKT4(ring, REG_A6XX_GRAS_SC_CNTL, 1); + OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */ + COND(binning, A6XX_GRAS_SC_CNTL_BINNING_PASS) | + COND(samples_passed, A6XX_GRAS_SC_CNTL_SAMPLES_PASSED)); +#else + DBG("render ctl stub"); +#endif +} + +static inline void +fd6_emit_lrz_flush(struct fd_ringbuffer *ring) +{ + /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably + * a workaround and not needed on all a5xx. + */ + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, LRZ_FLUSH); + + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x0); +} + +static inline enum a6xx_state_block +fd6_stage2shadersb(enum shader_t type) +{ + switch (type) { + case SHADER_VERTEX: + return SB6_VS_SHADER; + case SHADER_FRAGMENT: + return SB6_FS_SHADER; + case SHADER_COMPUTE: + return SB6_CS_SHADER; + default: + unreachable("bad shader type"); + return ~0; + } +} + +void fd6_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd6_emit *emit); + +void fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd6_emit *emit); + +void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp); + +void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + +void fd6_emit_init(struct pipe_context *pctx); + +#define WRITE(reg, val) do { \ + OUT_PKT4(ring, reg, 1); \ + OUT_RING(ring, val); \ + } while (0) + + +#endif /* FD6_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_format.c b/src/gallium/drivers/freedreno/a6xx/fd6_format.c new file mode 100644 index 00000000000..6a55d4e6388 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_format.c @@ -0,0 +1,452 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "fd6_format.h" + + +/* Specifies the table of all the formats and their features. Also supplies + * the helpers that look up various data in those tables. + */ + +struct fd6_format { + enum a6xx_vtx_fmt vtx; + enum a6xx_tex_fmt tex; + enum a6xx_color_fmt rb; + enum a3xx_color_swap swap; + boolean present; +}; + +#define RB6_NONE ~0 + +/* vertex + texture */ +#define VT(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT6_ ## fmt, \ + .tex = TFMT6_ ## fmt, \ + .rb = RB6_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* texture-only */ +#define _T(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = ~0, \ + .tex = TFMT6_ ## fmt, \ + .rb = RB6_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* vertex-only */ +#define V_(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT6_ ## fmt, \ + .tex = ~0, \ + .rb = RB6_ ## rbfmt, \ + .swap = swapfmt \ + } + +static struct fd6_format formats[PIPE_FORMAT_COUNT] = { + /* for blitting, treat PIPE_FORMAT_NONE as 8bit R8: */ + _T(R8_UINT, 8_UINT, R8_UINT, WZYX), + + /* 8-bit */ + VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), + VT(R8_SNORM, 8_SNORM, R8_SNORM, WZYX), + VT(R8_UINT, 8_UINT, R8_UINT, WZYX), + VT(R8_SINT, 8_SINT, R8_SINT, WZYX), + V_(R8_USCALED, 8_UINT, NONE, WZYX), + V_(R8_SSCALED, 8_UINT, NONE, WZYX), + + _T(A8_UNORM, 8_UNORM, A8_UNORM, WZYX), + _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), + _T(I8_UNORM, 8_UNORM, NONE, WZYX), + + _T(A8_UINT, 8_UINT, NONE, WZYX), + _T(A8_SINT, 8_SINT, NONE, WZYX), + _T(L8_UINT, 8_UINT, NONE, WZYX), + _T(L8_SINT, 8_SINT, NONE, WZYX), + _T(I8_UINT, 8_UINT, NONE, WZYX), + _T(I8_SINT, 8_SINT, NONE, WZYX), + + _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), + + /* 16-bit */ + VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), + VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + _T(Z16_UNORM, 16_UNORM, R16_UNORM, WZYX), + + _T(A16_UNORM, 16_UNORM, NONE, WZYX), + _T(A16_SNORM, 16_SNORM, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UNORM, 16_UNORM, NONE, WZYX), + _T(L16_SNORM, 16_SNORM, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UNORM, 16_UNORM, NONE, WZYX), + _T(I16_SNORM, 16_SNORM, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), + + VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), + VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), + VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), + VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), + V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX), + V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX), + + _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), + _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), + _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), + + /* 24-bit */ + V_(R8G8B8_UNORM, 8_8_8_UNORM, NONE, WZYX), + V_(R8G8B8_SNORM, 8_8_8_SNORM, NONE, WZYX), + V_(R8G8B8_UINT, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SINT, 8_8_8_SINT, NONE, WZYX), + V_(R8G8B8_USCALED, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SSCALED, 8_8_8_SINT, NONE, WZYX), + + /* 32-bit */ + VT(R32_UINT, 32_UINT, R32_UINT, WZYX), + VT(R32_SINT, 32_SINT, R32_SINT, WZYX), + V_(R32_USCALED, 32_UINT, NONE, WZYX), + V_(R32_SSCALED, 32_UINT, NONE, WZYX), + VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX), + V_(R32_FIXED, 32_FIXED, NONE, WZYX), + + _T(A32_UINT, 32_UINT, NONE, WZYX), + _T(A32_SINT, 32_SINT, NONE, WZYX), + _T(L32_UINT, 32_UINT, NONE, WZYX), + _T(L32_SINT, 32_SINT, NONE, WZYX), + _T(I32_UINT, 32_UINT, NONE, WZYX), + _T(I32_SINT, 32_SINT, NONE, WZYX), + + VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), + VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + VT(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + VT(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), + + _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), + _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + + VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + VT(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), + VT(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), + VT(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), + V_(R8G8B8A8_USCALED, 8_8_8_8_UINT, NONE, WZYX), + V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT, NONE, WZYX), + + VT(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + VT(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + + VT(A8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(A8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + + VT(A8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(A8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + + VT(R10G10B10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), + VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), + _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), + V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), + VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), + VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), + V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), + V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), + + VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), + + _T(Z24X8_UNORM, X8Z24_UNORM, X8Z24_UNORM, WZYX), + _T(X24S8_UINT, 8_8_8_8_UINT, X8Z24_UNORM, XYZW), // XXX + _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, X8Z24_UNORM, WZYX), + _T(Z32_FLOAT, 32_FLOAT, R32_FLOAT, WZYX), + _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT, R32_FLOAT, WZYX), + _T(X32_S8X24_UINT, 8_UINT, R8_UINT, WZYX), + + /* 48-bit */ + V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), + V_(R16G16B16_SNORM, 16_16_16_SNORM, NONE, WZYX), + V_(R16G16B16_UINT, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SINT, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_USCALED, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SSCALED, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), + + /* 64-bit */ + VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + VT(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + VT(R16G16B16X16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + VT(R16G16B16A16_USCALED, 16_16_16_16_UINT, NONE, WZYX), + VT(R16G16B16A16_SSCALED, 16_16_16_16_SINT, NONE, WZYX), + VT(R16G16B16A16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + VT(R16G16B16X16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + + VT(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), + VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), + V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX), + V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX), + VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX), + V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX), + + _T(L32A32_UINT, 32_32_UINT, NONE, WZYX), + _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), + + /* 96-bit */ + VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), + V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), + VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), + + /* 128-bit */ + VT(R32G32B32A32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + _T(R32G32B32X32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + VT(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + _T(R32G32B32X32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + V_(R32G32B32A32_USCALED, 32_32_32_32_UINT, NONE, WZYX), + V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT, NONE, WZYX), + VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ + _T(ETC1_RGB8, ETC1, NONE, WZYX), + _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), + _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), + _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), + _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), + _T(BPTC_SRGBA, BPTC, NONE, WZYX), + _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), + _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + + _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), + _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + + _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + + _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), +}; + +/* convert pipe format to vertex buffer format: */ +enum a6xx_vtx_fmt +fd6_pipe2vtx(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].vtx; +} + +/* convert pipe format to texture sampler format: */ +enum a6xx_tex_fmt +fd6_pipe2tex(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].tex; +} + +/* convert pipe format to MRT / copydest format used for render-target: */ +enum a6xx_color_fmt +fd6_pipe2color(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].rb; +} + +enum a3xx_color_swap +fd6_pipe2swap(enum pipe_format format) +{ + if (!formats[format].present) + return WZYX; + return formats[format].swap; +} + +// XXX possibly same as a4xx.. +enum a6xx_tex_fetchsize +fd6_pipe2fetchsize(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH6_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { + case 8: return TFETCH6_1_BYTE; + case 16: return TFETCH6_2_BYTE; + case 32: return TFETCH6_4_BYTE; + case 64: return TFETCH6_8_BYTE; + case 96: return TFETCH6_1_BYTE; /* Does this matter? */ + case 128: return TFETCH6_16_BYTE; + default: + debug_printf("Unknown block size for format %s: %d\n", + util_format_name(format), + util_format_get_blocksizebits(format)); + return TFETCH6_1_BYTE; + } +} + +enum a6xx_depth_format +fd6_pipe2depth(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return DEPTH6_16; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + return DEPTH6_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTH6_32; + default: + return ~0; + } +} + +static inline enum a6xx_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_X: return A6XX_TEX_X; + case PIPE_SWIZZLE_Y: return A6XX_TEX_Y; + case PIPE_SWIZZLE_Z: return A6XX_TEX_Z; + case PIPE_SWIZZLE_W: return A6XX_TEX_W; + case PIPE_SWIZZLE_0: return A6XX_TEX_ZERO; + case PIPE_SWIZZLE_1: return A6XX_TEX_ONE; + } +} + +uint32_t +fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + unsigned char swiz[4] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + }, rswiz[4]; + + util_format_compose_swizzles(desc->swizzle, swiz, rswiz); + + return A6XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | + A6XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | + A6XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | + A6XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_format.h b/src/gallium/drivers/freedreno/a6xx/fd6_format.h new file mode 100644 index 00000000000..6f96256fa85 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_format.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_UTIL_H_ +#define FD6_UTIL_H_ + +#include "freedreno_util.h" + +#include "a6xx.xml.h" + +enum a6xx_vtx_fmt fd6_pipe2vtx(enum pipe_format format); +enum a6xx_tex_fmt fd6_pipe2tex(enum pipe_format format); +enum a6xx_color_fmt fd6_pipe2color(enum pipe_format format); +enum a3xx_color_swap fd6_pipe2swap(enum pipe_format format); +enum a6xx_tex_fetchsize fd6_pipe2fetchsize(enum pipe_format format); +enum a6xx_depth_format fd6_pipe2depth(enum pipe_format format); + +uint32_t fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + +#endif /* FD6_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c new file mode 100644 index 00000000000..7388cce4446 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -0,0 +1,839 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include <stdio.h> + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "freedreno_draw.h" +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd6_gmem.h" +#include "fd6_context.h" +#include "fd6_draw.h" +#include "fd6_emit.h" +#include "fd6_program.h" +#include "fd6_format.h" +#include "fd6_zsa.h" + +static void +emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem) +{ + enum a6xx_tile_mode tile_mode; + unsigned i; + + for (i = 0; i < nr_bufs; i++) { + enum a6xx_color_fmt format = 0; + enum a3xx_color_swap swap = WZYX; + bool srgb = false, sint = false, uint = false; + struct fd_resource *rsc = NULL; + struct fd_resource_slice *slice = NULL; + uint32_t stride = 0; + uint32_t offset = 0; + + if (gmem) { + tile_mode = TILE6_2; + } else { + tile_mode = TILE6_LINEAR; + } + + if (!bufs[i]) + continue; + + struct pipe_surface *psurf = bufs[i]; + enum pipe_format pformat = psurf->format; + rsc = fd_resource(psurf->texture); + if (!rsc->bo) + continue; + + uint32_t base = gmem ? gmem->cbuf_base[i] : 0; + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd6_pipe2color(pformat); + swap = fd6_pipe2swap(pformat); + srgb = util_format_is_srgb(pformat); + sint = util_format_is_pure_sint(pformat); + uint = util_format_is_pure_uint(pformat); + + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + stride = slice->pitch * rsc->cpp; + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo)); + + OUT_PKT4(ring, REG_A6XX_RB_MRT_BUF_INFO(i), 6); + OUT_RING(ring, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) | + A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | + // COND(gmem, 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ + COND(srgb, A6XX_RB_MRT_BUF_INFO_COLOR_SRGB)); + OUT_RING(ring, A6XX_RB_MRT_PITCH(stride)); + OUT_RING(ring, A6XX_RB_MRT_ARRAY_PITCH(slice->size0)); + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ + OUT_RING(ring, base); /* RB_MRT[i].BASE_GMEM */ + OUT_PKT4(ring, REG_A6XX_SP_FS_MRT_REG(i), 1); + OUT_RING(ring, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format) | + COND(sint, A6XX_SP_FS_MRT_REG_COLOR_SINT) | + COND(uint, A6XX_SP_FS_MRT_REG_COLOR_UINT) | + COND(srgb, A6XX_SP_FS_MRT_REG_COLOR_SRGB)); + +#if 0 + /* when we support UBWC, these would be the system memory + * addr/pitch/etc: + */ + OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ + OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); + OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); +#endif + } +} + +static void +emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, + struct fd_gmem_stateobj *gmem) +{ + if (zsbuf) { + struct fd_resource *rsc = fd_resource(zsbuf->texture); + enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format); + struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + uint32_t stride = slice->pitch * rsc->cpp; + uint32_t size = slice->size0; + uint32_t base = gmem ? gmem->zsbuf_base[0] : 0; + + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); + OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_PITCH(stride)); + OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size)); + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */ + OUT_RING(ring, base); /* RB_DEPTH_BUFFER_BASE_GMEM */ + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + + if (rsc->lrz) { + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); + OUT_RELOCW(ring, rsc->lrz, 0x1000, 0, 0); + OUT_RING(ring, A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(rsc->lrz_pitch)); + OUT_RELOCW(ring, rsc->lrz, 0, 0, 0); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO/HI */ + } else { + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); + } + + if (rsc->stencil) { + struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); + stride = slice->pitch * rsc->cpp; + size = slice->size0; + uint32_t base = gmem ? gmem->zsbuf_base[1] : 0; + + OUT_PKT4(ring, REG_A6XX_RB_STENCIL_INFO, 5); + OUT_RING(ring, A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL); + OUT_RING(ring, A6XX_RB_STENCIL_BUFFER_PITCH(stride)); + OUT_RING(ring, A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(size)); + OUT_RELOCW(ring, rsc->stencil->bo, 0, 0, 0); /* RB_STENCIL_BASE_LO/HI */ + OUT_RING(ring, base); /* RB_STENCIL_BASE_LO */ + } + } else { + OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); + OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ + + OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); + + OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ + + OUT_PKT4(ring, REG_A6XX_RB_STENCIL_INFO, 1); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ + } +} + +static bool +use_hw_binning(struct fd_batch *batch) +{ + return false; +} + +static void +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); + *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); + } + util_dynarray_resize(&batch->draw_patches, 0); +} + +static void +patch_gmem_bases(struct fd_batch *batch) +{ + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + unsigned i; + + for (i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); + if (patch->val < MAX_RENDER_TARGETS) + *patch->cs = gmem->cbuf_base[patch->val]; + else + *patch->cs = gmem->zsbuf_base[0]; + } + util_dynarray_resize(&batch->gmem_patches, 0); +} + +static void +update_vsc_pipe(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct fd_ringbuffer *ring = batch->gmem; + int i; + + OUT_PKT4(ring, REG_A6XX_VSC_BIN_SIZE, 3); + OUT_RING(ring, A6XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A6XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + OUT_RELOCW(ring, fd6_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ + +#if 0 + OUT_PKT4(ring, REG_A6XX_UNKNOWN_0BC5, 2); + OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */ + OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */ +#endif + + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 16); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | + A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | + A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | + A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); + } + +#if 0 + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + if (!pipe->bo) { + pipe->bo = fd_bo_new(ctx->dev, 0x20000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + } + OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ + } +#endif + +#if 0 + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); + for (i = 0; i < 16; i++) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ + } +#endif +} + +static void +set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2) +{ + OUT_PKT4(ring, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | + A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | + A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); + + OUT_PKT4(ring, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | + A6XX_GRAS_RESOLVE_CNTL_1_Y(y1)); + OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | + A6XX_GRAS_RESOLVE_CNTL_2_Y(y2)); +} + +static void +emit_binning_pass(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + uint32_t x1 = gmem->minx; + uint32_t y1 = gmem->miny; + uint32_t x2 = gmem->minx + gmem->width - 1; + uint32_t y2 = gmem->miny + gmem->height - 1; + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BINNING) | 0x10); /* | 0x10 ? */ + emit_marker6(ring, 7); + +#if 0 + OUT_PKT4(ring, REG_A6XX_RB_CNTL, 1); + OUT_RING(ring, A6XX_RB_CNTL_WIDTH(gmem->bin_w) | + A6XX_RB_CNTL_HEIGHT(gmem->bin_h)); +#endif + + set_scissor(ring, x1, y1, x2, y2); + + update_vsc_pipe(batch); + +#if 0 + OUT_PKT4(ring, REG_A6XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, A6XX_VPC_MODE_CNTL_BINNING_PASS); +#endif + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_2C); + + OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | + A6XX_RB_WINDOW_OFFSET_Y(0)); + + /* emit IB to binning drawcmds: */ + ctx->emit_ib(ring, batch->binning); + + fd_reset_wfi(batch); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_2D); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); + + // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??) + + fd_wfi(batch, ring); + +#if 0 + OUT_PKT4(ring, REG_A6XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, 0x0); +#endif +} + +static void +disable_msaa(struct fd_ringbuffer *ring) +{ + // TODO MSAA + OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); +} + +static void +set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) +{ + OUT_PKT4(ring, REG_A6XX_X1_BIN_SIZE, 1); + OUT_RING(ring, A6XX_X1_BIN_SIZE_WIDTH(w) | + A6XX_X1_BIN_SIZE_HEIGHT(h) | flag); + OUT_PKT4(ring, REG_A6XX_X2_BIN_SIZE, 1); + OUT_RING(ring, A6XX_X2_BIN_SIZE_WIDTH(w) | + A6XX_X2_BIN_SIZE_HEIGHT(h) | flag); + + /* no flag for X3_BIN_SIZE... */ + OUT_PKT4(ring, REG_A6XX_X3_BIN_SIZE, 1); + OUT_RING(ring, A6XX_X3_BIN_SIZE_WIDTH(w) | + A6XX_X3_BIN_SIZE_HEIGHT(h)); +} + +/* before first tile */ +static void +fd6_emit_tile_init(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + fd6_emit_restore(batch, ring); + + if (batch->lrz_clear) + ctx->emit_ib(ring, batch->lrz_clear); + + fd6_emit_lrz_flush(ring); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, 0x31); /* vertex cache invalidate? */ + +#if 0 + OUT_PKT4(ring, REG_A6XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ +#endif + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + +#if 0 + OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ +#endif + +#if 0 + OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ +#endif + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(batch, ring); + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */ + + DBG("emit_mrt"); + emit_zs(ring, pfb->zsbuf, &ctx->gmem); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem); + + patch_gmem_bases(batch); + + set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); + + disable_msaa(ring); + + if (use_hw_binning(batch)) { + emit_binning_pass(batch); + fd6_emit_lrz_flush(ring); + patch_draws(batch, USE_VISIBILITY); + } else { + patch_draws(batch, IGNORE_VISIBILITY); + } +} + +static void +set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) +{ + OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | + A6XX_RB_WINDOW_OFFSET_Y(y1)); + + OUT_PKT4(ring, REG_A6XX_X1_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_X1_WINDOW_OFFSET_X(x1) | + A6XX_X1_WINDOW_OFFSET_Y(y1)); + + OUT_PKT4(ring, REG_A6XX_X2_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_X2_WINDOW_OFFSET_X(x1) | + A6XX_X2_WINDOW_OFFSET_Y(y1)); + + OUT_PKT4(ring, REG_A6XX_X3_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_X3_WINDOW_OFFSET_X(x1) | + A6XX_X3_WINDOW_OFFSET_Y(y1)); +} + +/* before mem2gmem */ +static void +fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd_ringbuffer *ring = batch->gmem; + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); /* | 0x10 ? */ + emit_marker6(ring, 7); + + uint32_t x1 = tile->xoff; + uint32_t y1 = tile->yoff; + uint32_t x2 = tile->xoff + tile->bin_w - 1; + uint32_t y2 = tile->yoff + tile->bin_h - 1; + + set_scissor(ring, x1, y1, x2, y2); + + set_window_offset(ring, x1, y1); + + OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9306, 1); + OUT_RING(ring, 1); + + if (use_hw_binning(batch)) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; + + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); + OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | + CP_SET_BIN_DATA5_0_VSC_N(tile->n)); + OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOC(ring, fd6_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ + (tile->p * 4), 0, 0); + } else { + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); + } + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); +} + +static void +set_blit_scissor(struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_scissor_state blit_scissor; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + blit_scissor.minx = batch->max_scissor.minx; + blit_scissor.miny = batch->max_scissor.miny; + blit_scissor.maxx = MIN2(pfb->width - 1, batch->max_scissor.maxx); + blit_scissor.maxy = MIN2(pfb->height - 1, batch->max_scissor.maxy); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + OUT_RING(ring, + A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) | + A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny)); + OUT_RING(ring, + A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx) | + A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy)); +} + +static void +emit_blit(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, + struct fd_resource *rsc) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_resource_slice *slice; + uint32_t offset; + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + enum pipe_format pfmt = psurf->format; + enum a6xx_color_fmt format = fd6_pipe2color(pfmt); + uint32_t stride = slice->pitch * rsc->cpp; + uint32_t size = slice->size0; + enum a3xx_color_swap swap = fd6_pipe2swap(pfmt); + + // TODO: tile mode + // bool tiled; + // tiled = rsc->tile_mode && + // !fd_resource_level_linear(psurf->texture, psurf->u.tex.level); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 5); + OUT_RING(ring, + A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format) | + A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ + OUT_RING(ring, A6XX_RB_BLIT_DST_PITCH(stride)); + OUT_RING(ring, A6XX_RB_BLIT_DST_ARRAY_PITCH(size)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, base); + + fd6_emit_blit(batch->ctx, ring); +} + +static void +emit_restore_blit(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, + struct fd_resource *rsc, + unsigned buffer) +{ + struct fd_ringbuffer *ring = batch->gmem; + uint32_t info = 0; + + switch (buffer) { + case FD_BUFFER_COLOR: + info |= A6XX_RB_BLIT_INFO_UNK0; + break; + case FD_BUFFER_STENCIL: + info |= A6XX_RB_BLIT_INFO_UNK0; + break; + case FD_BUFFER_DEPTH: + info |= A6XX_RB_BLIT_INFO_DEPTH; + break; + } + + if (util_format_is_pure_integer(psurf->format)) + info |= A6XX_RB_BLIT_INFO_INTEGER; + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, info | A6XX_RB_BLIT_INFO_GMEM); + + emit_blit(batch, base, psurf, rsc); +} + +/* + * transfer from system memory to gmem + */ +static void +fd6_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + set_blit_scissor(batch); + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_restore_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i], + fd_resource(pfb->cbufs[i]->texture), + FD_BUFFER_COLOR); + } + } + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + + if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH)) { + emit_restore_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc, + FD_BUFFER_DEPTH); + } + if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL)) { + emit_restore_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil, + FD_BUFFER_STENCIL); + } + } +} + +/* before IB to rendering cmds: */ +static void +fd6_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) +{ +} + +static void +emit_resolve_blit(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, + struct fd_resource *rsc, + unsigned buffer) +{ + struct fd_ringbuffer *ring = batch->gmem; + uint32_t info = 0; + + switch (buffer) { + case FD_BUFFER_COLOR: + break; + case FD_BUFFER_STENCIL: + info |= A6XX_RB_BLIT_INFO_UNK0; + break; + case FD_BUFFER_DEPTH: + info |= A6XX_RB_BLIT_INFO_DEPTH; + break; + } + + if (util_format_is_pure_integer(psurf->format)) + info |= A6XX_RB_BLIT_INFO_INTEGER; + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, info); + + emit_blit(batch, base, psurf, rsc); +} + +/* + * transfer from gmem to system memory (ie. normal RAM) + */ + +static void +fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE)); /* | 0x10 ? */ + emit_marker6(ring, 7); + + set_blit_scissor(batch); + + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + + if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) { + emit_resolve_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc, + FD_BUFFER_DEPTH); + } + if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) { + emit_resolve_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil, + FD_BUFFER_STENCIL); + } + } + + if (batch->resolve & FD_BUFFER_COLOR) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_resolve_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i], + fd_resource(pfb->cbufs[i]->texture), + FD_BUFFER_COLOR); + } + } +} + +static void +fd6_emit_tile_fini(struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->gmem; + + fd6_emit_lrz_flush(ring); + + fd6_cache_flush(batch, ring); +} + +static void +fd6_emit_sysmem_prep(struct fd_batch *batch) +{ + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_ringbuffer *ring = batch->gmem; + + fd6_emit_restore(batch, ring); + + fd6_emit_lrz_flush(ring); + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BYPASS) | 0x10); /* | 0x10 ? */ + emit_marker6(ring, 7); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PC_CCU_INVALIDATE_COLOR); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, 0x31); /* vertex cache invalidate? */ + +#if 0 + OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ +#endif + +#if 0 + OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ +#endif + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(batch, ring); + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ + + set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1); + + set_window_offset(ring, 0, 0); + + set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */ + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); + + patch_draws(batch, IGNORE_VISIBILITY); + + emit_zs(ring, pfb->zsbuf, NULL); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); + + disable_msaa(ring); +} + +static void +fd6_emit_sysmem_fini(struct fd_batch *batch) +{ + struct fd6_context *fd6_ctx = fd6_context(batch->ctx); + struct fd_ringbuffer *ring = batch->gmem; + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + fd6_emit_lrz_flush(ring); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, UNK_1D); + OUT_RELOCW(ring, fd6_ctx->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); +} + +void +fd6_gmem_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->emit_tile_init = fd6_emit_tile_init; + ctx->emit_tile_prep = fd6_emit_tile_prep; + ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem; + ctx->emit_tile_renderprep = fd6_emit_tile_renderprep; + ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem; + ctx->emit_tile_fini = fd6_emit_tile_fini; + ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; + ctx->emit_sysmem_fini = fd6_emit_sysmem_fini; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h new file mode 100644 index 00000000000..9804fc6a973 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2015 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_GMEM_H_ +#define FD6_GMEM_H_ + +#include "pipe/p_context.h" + +void fd6_gmem_init(struct pipe_context *pctx); + +#endif /* FD6_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c new file mode 100644 index 00000000000..e79ee2f90ad --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2017 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" + +#include "freedreno_resource.h" +#include "fd6_image.h" +#include "fd6_format.h" +#include "fd6_texture.h" + +static enum a6xx_state_block texsb[] = { + [PIPE_SHADER_COMPUTE] = SB6_CS_TEX, + [PIPE_SHADER_FRAGMENT] = SB6_FS_TEX, +}; + +static enum a6xx_state_block imgsb[] = { + [PIPE_SHADER_COMPUTE] = SB6_CS_SSBO, + [PIPE_SHADER_FRAGMENT] = SB6_SSBO, +}; + +struct fd6_image { + enum pipe_format pfmt; + enum a6xx_tex_fmt fmt; + enum a6xx_tex_fetchsize fetchsize; + enum a6xx_tex_type type; + bool srgb; + uint32_t cpp; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t pitch; + uint32_t array_pitch; + struct fd_bo *bo; + uint32_t offset; +}; + +static void translate_image(struct fd6_image *img, struct pipe_image_view *pimg) +{ + enum pipe_format format = pimg->format; + struct pipe_resource *prsc = pimg->resource; + struct fd_resource *rsc = fd_resource(prsc); + unsigned lvl; + + if (!pimg->resource) { + memset(img, 0, sizeof(*img)); + return; + } + + img->pfmt = format; + img->fmt = fd6_pipe2tex(format); + img->fetchsize = fd6_pipe2fetchsize(format); + img->type = fd6_tex_type(prsc->target); + img->srgb = util_format_is_srgb(format); + img->cpp = rsc->cpp; + img->bo = rsc->bo; + + if (prsc->target == PIPE_BUFFER) { + lvl = 0; + img->offset = pimg->u.buf.offset; + img->pitch = pimg->u.buf.size; + img->array_pitch = 0; + } else { + lvl = pimg->u.tex.level; + img->offset = rsc->slices[lvl].offset; + img->pitch = rsc->slices[lvl].pitch * rsc->cpp; + img->array_pitch = rsc->layer_size; + } + + img->width = u_minify(prsc->width0, lvl); + img->height = u_minify(prsc->height0, lvl); + img->depth = u_minify(prsc->depth0, lvl); +} + +static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, + struct fd6_image *img, enum pipe_shader_type shader) +{ + unsigned opcode = CP_LOAD_STATE6_FRAG; + + assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); + + OUT_PKT7(ring, opcode, 3 + 12); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + + OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) | + fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | + COND(img->srgb, A6XX_TEX_CONST_0_SRGB)); + OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) | + A6XX_TEX_CONST_1_HEIGHT(img->height)); + OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) | + A6XX_TEX_CONST_2_TYPE(img->type) | + A6XX_TEX_CONST_2_PITCH(img->pitch)); + OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); + if (img->bo) { + OUT_RELOC(ring, img->bo, img->offset, + (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth)); + } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); +} + +static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot, + struct fd6_image *img, enum pipe_shader_type shader) +{ + unsigned opcode = CP_LOAD_STATE6_FRAG; + + assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); + +#if 0 + OUT_PKT7(ring, opcode, 3 + 4); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | + CP_LOAD_STATE6_0_STATE_TYPE(0) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0)); + OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch)); + OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch)); + OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp)); +#endif + +#if 0 + OUT_PKT7(ring, opcode, 3 + 2); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | + CP_LOAD_STATE6_0_STATE_TYPE(1) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) | + A6XX_SSBO_1_0_WIDTH(img->width)); + OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) | + A6XX_SSBO_1_1_DEPTH(img->depth)); +#endif + + OUT_PKT7(ring, opcode, 3 + 2); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | + CP_LOAD_STATE6_0_STATE_TYPE(2) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + if (img->bo) { + OUT_RELOCW(ring, img->bo, img->offset, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } +} + +/* Note that to avoid conflicts with textures and non-image "SSBO"s, images + * are placedd, in reverse order, at the end of the state block, so for + * example the sampler state: + * + * 0: first texture + * 1: second texture + * .... + * N-1: second image + * N: first image + */ +static unsigned +get_image_slot(unsigned index) +{ + /* TODO figure out real limit per generation, and don't hardcode. + * This needs to match get_image_slot() in ir3_compiler_nir. + * Possibly should be factored out into shared helper? + */ + const unsigned max_samplers = 16; + return max_samplers - index - 1; +} + +/* Emit required "SSBO" and sampler state. The sampler state is used by the + * hw for imageLoad(), and "SSBO" state for imageStore(). Returns max sampler + * used. + */ +void +fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pipe_shader_type shader) +{ + struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader]; + unsigned enabled_mask = so->enabled_mask; + + while (enabled_mask) { + unsigned index = u_bit_scan(&enabled_mask); + unsigned slot = get_image_slot(index); + struct fd6_image img; + + translate_image(&img, &so->si[index]); + + emit_image_tex(ring, slot, &img, shader); + emit_image_ssbo(ring, slot, &img, shader); + } +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h new file mode 100644 index 00000000000..0ee53932737 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2017 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_IMAGE_H_ +#define FD6_IMAGE_H_ + +#include "freedreno_context.h" + +void fd6_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pipe_shader_type shader); + +#endif /* FD6_IMAGE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c new file mode 100644 index 00000000000..0e4206773dc --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -0,0 +1,718 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/bitset.h" + +#include "freedreno_program.h" + +#include "fd6_program.h" +#include "fd6_emit.h" +#include "fd6_texture.h" +#include "fd6_format.h" + +static void +delete_shader_stateobj(struct fd6_shader_stateobj *so) +{ + ir3_shader_destroy(so->shader); + free(so); +} + +static struct fd6_shader_stateobj * +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd_context *ctx = fd_context(pctx); + struct ir3_compiler *compiler = ctx->screen->compiler; + struct fd6_shader_stateobj *so = CALLOC_STRUCT(fd6_shader_stateobj); + so->shader = ir3_shader_create(compiler, cso, type, &ctx->debug); + return so; +} + +static void * +fd6_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); +} + +static void +fd6_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd6_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void * +fd6_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); +} + +static void +fd6_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd6_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +void +fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) +{ + const struct ir3_info *si = &so->info; + enum a6xx_state_block sb = fd6_stage2shadersb(so->type); + enum a6xx_state_src src; + uint32_t i, sz, *bin; + unsigned opcode; + + if (fd_mesa_debug & FD_DBG_DIRECT) { + sz = si->sizedwords; + src = SS6_DIRECT; + bin = fd_bo_map(so->bo); + } else { + sz = 0; + src = SS6_INDIRECT; + bin = NULL; + } + + switch (so->type) { + case SHADER_VERTEX: + opcode = CP_LOAD_STATE6_GEOM; + break; + case SHADER_FRAGMENT: + case SHADER_COMPUTE: + opcode = CP_LOAD_STATE6_FRAG; + break; + default: + unreachable("bad shader type"); + } + + OUT_PKT7(ring, opcode, 3 + sz); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(src) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(so->instrlen)); + if (bin) { + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + } else { + OUT_RELOC(ring, so->bo, 0, 0, 0); + } + + /* for how clever coverity is, it is sometimes rather dull, and + * doesn't realize that the only case where bin==NULL, sz==0: + */ + assume(bin || (sz == 0)); + + for (i = 0; i < sz; i++) { + OUT_RING(ring, bin[i]); + } +} + +/* Add any missing varyings needed for stream-out. Otherwise varyings not + * used by fragment shader will be stripped out. + */ +static void +link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) +{ + const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + + /* + * First, any stream-out varyings not already in linkage map (ie. also + * consumed by frag shader) need to be added: + */ + for (unsigned i = 0; i < strmout->num_outputs; i++) { + const struct pipe_stream_output *out = &strmout->output[i]; + unsigned k = out->register_index; + unsigned compmask = + (1 << (out->num_components + out->start_component)) - 1; + unsigned idx, nextloc = 0; + + /* psize/pos need to be the last entries in linkage map, and will + * get added link_stream_out, so skip over them: + */ + if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || + (v->outputs[k].slot == VARYING_SLOT_POS)) + continue; + + for (idx = 0; idx < l->cnt; idx++) { + if (l->var[idx].regid == v->outputs[k].regid) + break; + nextloc = MAX2(nextloc, l->var[idx].loc + 4); + } + + /* add if not already in linkage map: */ + if (idx == l->cnt) + ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); + + /* expand component-mask if needed, ie streaming out all components + * but frag shader doesn't consume all components: + */ + if (compmask & ~l->var[idx].compmask) { + l->var[idx].compmask |= compmask; + l->max_loc = MAX2(l->max_loc, + l->var[idx].loc + util_last_bit(l->var[idx].compmask)); + } + } +} + +#if 0 +/* TODO maybe some of this we could pre-compute once rather than having + * so much draw-time logic? + */ +static void +emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, + struct ir3_shader_linkage *l) +{ + const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0}; + unsigned prog[align(l->max_loc, 2) / 2]; + + memset(prog, 0, sizeof(prog)); + + for (unsigned i = 0; i < strmout->num_outputs; i++) { + const struct pipe_stream_output *out = &strmout->output[i]; + unsigned k = out->register_index; + unsigned idx; + + ncomp[out->output_buffer] += out->num_components; + + /* linkage map sorted by order frag shader wants things, so + * a bit less ideal here.. + */ + for (idx = 0; idx < l->cnt; idx++) + if (l->var[idx].regid == v->outputs[k].regid) + break; + + debug_assert(idx < l->cnt); + + for (unsigned j = 0; j < out->num_components; j++) { + unsigned c = j + out->start_component; + unsigned loc = l->var[idx].loc + c; + unsigned off = j + out->dst_offset; /* in dwords */ + + if (loc & 1) { + prog[loc/2] |= A6XX_VPC_SO_PROG_B_EN | + A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) | + A6XX_VPC_SO_PROG_B_OFF(off * 4); + } else { + prog[loc/2] |= A6XX_VPC_SO_PROG_A_EN | + A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) | + A6XX_VPC_SO_PROG_A_OFF(off * 4); + } + } + } + + OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * ARRAY_SIZE(prog))); + OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL); + OUT_RING(ring, A6XX_VPC_SO_BUF_CNTL_ENABLE | + COND(ncomp[0] > 0, A6XX_VPC_SO_BUF_CNTL_BUF0) | + COND(ncomp[1] > 0, A6XX_VPC_SO_BUF_CNTL_BUF1) | + COND(ncomp[2] > 0, A6XX_VPC_SO_BUF_CNTL_BUF2) | + COND(ncomp[3] > 0, A6XX_VPC_SO_BUF_CNTL_BUF3)); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(0)); + OUT_RING(ring, ncomp[0]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(1)); + OUT_RING(ring, ncomp[1]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(2)); + OUT_RING(ring, ncomp[2]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3)); + OUT_RING(ring, ncomp[3]); + OUT_RING(ring, REG_A6XX_VPC_SO_CNTL); + OUT_RING(ring, A6XX_VPC_SO_CNTL_ENABLE); + for (unsigned i = 0; i < ARRAY_SIZE(prog); i++) { + OUT_RING(ring, REG_A6XX_VPC_SO_PROG); + OUT_RING(ring, prog[i]); + } +} +#endif + +struct stage { + const struct ir3_shader_variant *v; + const struct ir3_info *i; + /* const sizes are in units of 4 * vec4 */ + uint8_t constoff; + uint8_t constlen; + /* instr sizes are in units of 16 instructions */ + uint8_t instroff; + uint8_t instrlen; +}; + +enum { + VS = 0, + FS = 1, + HS = 2, + DS = 3, + GS = 4, + MAX_STAGES +}; + +static void +setup_stages(struct fd6_emit *emit, struct stage *s) +{ + unsigned i; + + s[VS].v = fd6_emit_get_vp(emit); + s[FS].v = fd6_emit_get_fp(emit); + + s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ + + for (i = 0; i < MAX_STAGES; i++) { + if (s[i].v) { + s[i].i = &s[i].v->info; + /* constlen is in units of 4 * vec4: */ + s[i].constlen = align(s[i].v->constlen, 4) / 4; + /* instrlen is already in units of 16 instr.. although + * probably we should ditch that and not make the compiler + * care about instruction group size of a3xx vs a5xx + */ + s[i].instrlen = s[i].v->instrlen; + } else { + s[i].i = NULL; + s[i].constlen = 0; + s[i].instrlen = 0; + } + } + + unsigned constoff = 0; + for (i = 0; i < MAX_STAGES; i++) { + s[i].constoff = constoff; + constoff += s[i].constlen; + } + + s[VS].instroff = 0; + s[FS].instroff = 64 - s[FS].instrlen; + s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; +} + +void +fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd6_emit *emit) +{ + struct stage s[MAX_STAGES]; + uint32_t pos_regid, psize_regid, color_regid[8]; + uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, samp_mask_regid; + uint32_t vcoord_regid, vertex_regid, instance_regid; + enum a3xx_threadsize fssz; + uint8_t psize_loc = ~0; + int i, j; + + setup_stages(emit, s); + + fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS; + + pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); + psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); + vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); + instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); + + if (s[FS].v->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = + ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); + } else { + color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); + color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); + color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); + color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); + color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); + color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); + color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); + color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); + } + + samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); + samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); + + /* we could probably divide this up into things that need to be + * emitted if frag-prog is dirty vs if vert-prog is dirty.. + */ + + OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); + OUT_RING(ring, 0); + + struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_VERTEX]; + OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2); + OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) | + A6XX_SP_VS_CONFIG_NTEX(tex->num_textures) | + A6XX_SP_VS_CONFIG_NSAMP(tex->num_samplers)); /* SP_VS_CONFIG */ + OUT_RING(ring, s[VS].instrlen); /* SP_VS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 2); + OUT_RING(ring, COND(s[HS].v, A6XX_SP_HS_CONFIG_ENABLED)); /* SP_HS_CONFIG */ + OUT_RING(ring, s[HS].instrlen); /* SP_HS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 2); + OUT_RING(ring, COND(s[DS].v, A6XX_SP_DS_CONFIG_ENABLED)); /* SP_DS_CONFIG */ + OUT_RING(ring, s[DS].instrlen); /* SP_DS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 2); + OUT_RING(ring, COND(s[GS].v, A6XX_SP_GS_CONFIG_ENABLED)); /* SP_GS_CONFIG */ + OUT_RING(ring, s[GS].instrlen); /* SP_GS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A99E, 1); + OUT_RING(ring, 0x7fc0); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_AB00, 1); + OUT_RING(ring, 0x5); + + tex = &ctx->tex[PIPE_SHADER_FRAGMENT]; + OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2); + OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) | + A6XX_SP_FS_CONFIG_NTEX(tex->num_textures) | + A6XX_SP_FS_CONFIG_NSAMP(tex->num_samplers)); + /* SP_FS_CONFIG */ + OUT_RING(ring, s[FS].instrlen); /* SP_FS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); + OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(s[VS].constlen, 4)) | 0x100); /* HLSQ_VS_CONSTLEN */ + OUT_RING(ring, A6XX_HLSQ_HS_CNTL_CONSTLEN(align(s[HS].constlen, 4))); /* HLSQ_HS_CONSTLEN */ + OUT_RING(ring, A6XX_HLSQ_DS_CNTL_CONSTLEN(align(s[DS].constlen, 4))); /* HLSQ_DS_CONSTLEN */ + OUT_RING(ring, A6XX_HLSQ_GS_CNTL_CONSTLEN(align(s[GS].constlen, 4))); /* HLSQ_GS_CONSTLEN */ + + OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1); + OUT_RING(ring, s[FS].constlen | 0x100); /* HLSQ_FS_CONSTLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1); + OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) | + A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + A6XX_SP_VS_CTRL_REG0_MERGEDREGS | + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[VS].v->has_samp, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, s[VS].v, s[FS].v); + + if ((s[VS].v->shader->stream_output.num_outputs > 0) && + !emit->key.binning_pass) + link_stream_out(&l, s[VS].v); + + BITSET_DECLARE(varbs, 128) = {0}; + uint32_t *varmask = (uint32_t *)varbs; + + for (i = 0; i < l.cnt; i++) + for (j = 0; j < util_last_bit(l.var[i].compmask); j++) + BITSET_SET(varbs, l.var[i].loc + j); + + OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4); + OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + + /* a6xx appends pos/psize to end of the linkage map: */ + if (pos_regid != regid(63,0)) + ir3_link_add(&l, pos_regid, 0xf, l.max_loc); + + if (psize_regid != regid(63,0)) { + psize_loc = l.max_loc; + ir3_link_add(&l, psize_regid, 0x1, l.max_loc); + } + +#if 0 + if ((s[VS].v->shader->stream_output.num_outputs > 0) && + !emit->key.binning_pass) { + emit_stream_out(ring, s[VS].v, &l); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, 0x00000000); + } else { + OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + } +#endif + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A6XX_SP_VS_OUT_REG(i), 1); + + reg |= A6XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A6XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; + + reg |= A6XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A6XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; + + OUT_RING(ring, reg); + } + + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A6XX_SP_VS_VPC_DST_REG(i), 1); + + reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); + reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); + reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); + reg |= A6XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); + + OUT_RING(ring, reg); + } + + OUT_PKT4(ring, REG_A6XX_SP_VS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ + + if (s[VS].instrlen) + fd6_emit_shader(ring, s[VS].v); + + // TODO depending on other bits in this reg (if any) set somewhere else? +#if 0 + OUT_PKT4(ring, REG_A6XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, COND(s[VS].v->writes_psize, A6XX_PC_PRIM_VTX_CNTL_PSIZE)); +#endif + + OUT_PKT4(ring, REG_A6XX_SP_PRIMITIVE_CNTL, 1); + OUT_RING(ring, A6XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); + + bool enable_varyings = s[FS].v->total_in > 0; + + OUT_PKT4(ring, REG_A6XX_VPC_CNTL_0, 1); + OUT_RING(ring, A6XX_VPC_CNTL_0_NUMNONPOSVAR(s[FS].v->total_in) | + COND(enable_varyings, A6XX_VPC_CNTL_0_VARYING) | + 0xff00ff00); + + fd6_context(ctx)->max_loc = l.max_loc; + + OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_1, 1); + OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(l.max_loc) | + COND(psize_regid != regid(63,0), 0x100)); + + if (emit->key.binning_pass) { + OUT_PKT4(ring, REG_A6XX_SP_FS_OBJ_START_LO, 2); + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ + } else { + OUT_PKT4(ring, REG_A6XX_SP_FS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ + } + + OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5); + OUT_RING(ring, 0x7); /* XXX */ + OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | + A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) | + A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samp_mask_regid) | + 0xfc000000); /* XXX */ + OUT_RING(ring, A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | + 0xfcfcfc00); /* XXX */ + OUT_RING(ring, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | + A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | + 0x0000fcfc); /* XXX */ + OUT_RING(ring, 0xfc); /* XXX */ + + OUT_PKT4(ring, REG_A6XX_HLSQ_UNKNOWN_B980, 1); + OUT_RING(ring, s[FS].v->total_in > 0 ? 3 : 1); + + OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1); + OUT_RING(ring, A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | + COND(s[FS].v->total_in > 0, A6XX_SP_FS_CTRL_REG0_VARYING) | + COND(s[FS].v->frag_coord, A6XX_SP_FS_CTRL_REG0_VARYING) | + 0x1000000 | + A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | + A6XX_SP_FS_CTRL_REG0_MERGEDREGS | + A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[FS].v->has_samp, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1); + OUT_RING(ring, 0); /* XXX */ + + OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0xff); /* XXX */ + + OUT_PKT4(ring, REG_A6XX_VPC_GS_SIV_CNTL, 1); + OUT_RING(ring, 0x0000ffff); /* XXX */ + +#if 0 + OUT_PKT4(ring, REG_A6XX_SP_SP_CNTL, 1); + OUT_RING(ring, 0x00000010); /* XXX */ +#endif + + OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1); + OUT_RING(ring, COND(enable_varyings, A6XX_GRAS_CNTL_VARYING) | + COND(s[FS].v->frag_coord, A6XX_GRAS_CNTL_XCOORD | + A6XX_GRAS_CNTL_YCOORD | + A6XX_GRAS_CNTL_ZCOORD | + A6XX_GRAS_CNTL_WCOORD)); + + OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2); + OUT_RING(ring, COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_VARYING | + A6XX_RB_RENDER_CONTROL0_UNK10) | + COND(s[FS].v->frag_coord, A6XX_RB_RENDER_CONTROL0_XCOORD | + A6XX_RB_RENDER_CONTROL0_YCOORD | + A6XX_RB_RENDER_CONTROL0_ZCOORD | + A6XX_RB_RENDER_CONTROL0_WCOORD)); + OUT_RING(ring, COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS)); + + OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); + for (i = 0; i < 8; i++) { + OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | + COND(emit->key.half_precision, + A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); + } + + OUT_PKT4(ring, REG_A6XX_VPC_PACK, 1); + OUT_RING(ring, A6XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) | + A6XX_VPC_PACK_PSIZELOC(psize_loc) | + A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc)); + + if (!emit->key.binning_pass) { + uint32_t vinterp[8], vpsrepl[8]; + + memset(vinterp, 0, sizeof(vinterp)); + memset(vpsrepl, 0, sizeof(vpsrepl)); + + /* looks like we need to do int varyings in the frag + * shader on a5xx (no flatshad reg? or a420.0 bug?): + * + * (sy)(ss)nop + * (sy)ldlv.u32 r0.x,l[r0.x], 1 + * ldlv.u32 r0.y,l[r0.x+1], 1 + * (ss)bary.f (ei)r63.x, 0, r0.x + * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x + * (rpt5)nop + * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 + * + * Possibly on later a5xx variants we'll be able to use + * something like the code below instead of workaround + * in the shader: + */ + /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ + for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { + /* NOTE: varyings are packed, so if compmask is 0xb + * then first, third, and fourth component occupy + * three consecutive varying slots: + */ + unsigned compmask = s[FS].v->inputs[j].compmask; + + uint32_t inloc = s[FS].v->inputs[j].inloc; + + if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || + (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { + uint32_t loc = inloc; + + for (i = 0; i < 4; i++) { + if (compmask & (1 << i)) { + vinterp[loc / 16] |= 1 << ((loc % 16) * 2); + //flatshade[loc / 32] |= 1 << (loc % 32); + loc++; + } + } + } + + gl_varying_slot slot = s[FS].v->inputs[j].slot; + + /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ + if (slot >= VARYING_SLOT_VAR0) { + unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & texmask) { + /* mask is two 2-bit fields, where: + * '01' -> S + * '10' -> T + * '11' -> 1 - T (flip mode) + */ + unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; + uint32_t loc = inloc; + if (compmask & 0x1) { + vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x2) { + vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x4) { + /* .z <- 0.0f */ + vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x8) { + /* .w <- 1.0f */ + vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); + loc++; + } + } + } + } + + OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ + + OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ + } + + if (!emit->key.binning_pass) + if (s[FS].instrlen) + fd6_emit_shader(ring, s[FS].v); + + OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6); + OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | + A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) | + 0xfcfc0000); + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */ + OUT_RING(ring, 0xfcfcfcfc); /* VFD_CONTROL_3 */ + OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */ + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_5 */ + OUT_RING(ring, 0x00000000); /* VFD_CONTROL_6 */ +} + +void +fd6_prog_init(struct pipe_context *pctx) +{ + pctx->create_fs_state = fd6_fp_state_create; + pctx->delete_fs_state = fd6_fp_state_delete; + + pctx->create_vs_state = fd6_vp_state_create; + pctx->delete_vs_state = fd6_vp_state_delete; + + fd_prog_init(pctx); +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h new file mode 100644 index 00000000000..fa4f84fee2f --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_PROGRAM_H_ +#define FD6_PROGRAM_H_ + +#include "pipe/p_context.h" +#include "freedreno_context.h" +#include "ir3_shader.h" + +struct fd6_shader_stateobj { + struct ir3_shader *shader; +}; + +struct fd6_emit; + +void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so); + +void fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd6_emit *emit); + +void fd6_prog_init(struct pipe_context *pctx); + +#endif /* FD6_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.c b/src/gallium/drivers/freedreno/a6xx/fd6_query.c new file mode 100644 index 00000000000..1f9dce74fe6 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2017 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +/* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */ + +#include "freedreno_query_acc.h" +#include "freedreno_resource.h" + +#include "fd6_context.h" +#include "fd6_format.h" +#include "fd6_query.h" + +struct PACKED fd6_query_sample { + uint64_t start; + uint64_t result; + uint64_t stop; +}; + +#define query_sample(aq, field) \ + fd_resource((aq)->prsc)->bo, \ + offsetof(struct fd6_query_sample, field), \ + 0, 0 + +/* + * Occlusion Query: + * + * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they + * interpret results + */ + +static void +occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); + OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); + + OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2); + OUT_RELOCW(ring, query_sample(aq, start)); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, ZPASS_DONE); + fd_reset_wfi(batch); + + fd6_context(batch->ctx)->samples_passed_queries++; +} + +static void +occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RELOCW(ring, query_sample(aq, stop)); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + + OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); + + OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); + OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); + + OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2); + OUT_RELOCW(ring, query_sample(aq, stop)); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, ZPASS_DONE); + fd_reset_wfi(batch); + + OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); + OUT_RING(ring, 0x00000014); // XXX + OUT_RELOC(ring, query_sample(aq, stop)); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0x00000010); // XXX + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ + OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ + OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ + OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ + + fd6_context(batch->ctx)->samples_passed_queries--; +} + +static void +occlusion_counter_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_query_sample *sp = buf; + result->u64 = sp->result; +} + +static void +occlusion_predicate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_query_sample *sp = buf; + result->b = !!sp->result; +} + +static const struct fd_acc_sample_provider occlusion_counter = { + .query_type = PIPE_QUERY_OCCLUSION_COUNTER, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd6_query_sample), + .resume = occlusion_resume, + .pause = occlusion_pause, + .result = occlusion_counter_result, +}; + +static const struct fd_acc_sample_provider occlusion_predicate = { + .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd6_query_sample), + .resume = occlusion_resume, + .pause = occlusion_pause, + .result = occlusion_predicate_result, +}; + +static const struct fd_acc_sample_provider occlusion_predicate_conservative = { + .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, + .active = FD_STAGE_DRAW, + .size = sizeof(struct fd6_query_sample), + .resume = occlusion_resume, + .pause = occlusion_pause, + .result = occlusion_predicate_result, +}; + +/* + * Timestamp Queries: + */ + +static void +timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + CP_EVENT_WRITE_0_TIMESTAMP); + OUT_RELOCW(ring, query_sample(aq, start)); + OUT_RING(ring, 0x00000000); + + fd_reset_wfi(batch); +} + +static void +timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->draw; + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + CP_EVENT_WRITE_0_TIMESTAMP); + OUT_RELOCW(ring, query_sample(aq, stop)); + OUT_RING(ring, 0x00000000); + + fd_reset_wfi(batch); + fd_wfi(batch, ring); + + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ + OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ + OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ + OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ +} + +static uint64_t +ticks_to_ns(uint32_t ts) +{ + /* This is based on the 19.2MHz always-on rbbm timer. + * + * TODO we should probably query this value from kernel.. + */ + return ts * (1000000000 / 19200000); +} + +static void +time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_query_sample *sp = buf; + result->u64 = ticks_to_ns(sp->result); +} + +static void +timestamp_accumulate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd6_query_sample *sp = buf; + result->u64 = ticks_to_ns(sp->result); +} + +static const struct fd_acc_sample_provider time_elapsed = { + .query_type = PIPE_QUERY_TIME_ELAPSED, + .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .size = sizeof(struct fd6_query_sample), + .resume = timestamp_resume, + .pause = timestamp_pause, + .result = time_elapsed_accumulate_result, +}; + +/* NOTE: timestamp query isn't going to give terribly sensible results + * on a tiler. But it is needed by qapitrace profile heatmap. If you + * add in a binning pass, the results get even more non-sensical. So + * we just return the timestamp on the first tile and hope that is + * kind of good enough. + */ + +static const struct fd_acc_sample_provider timestamp = { + .query_type = PIPE_QUERY_TIMESTAMP, + .active = FD_STAGE_ALL, + .size = sizeof(struct fd6_query_sample), + .resume = timestamp_resume, + .pause = timestamp_pause, + .result = timestamp_accumulate_result, +}; + +void +fd6_query_context_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->create_query = fd_acc_create_query; + ctx->query_set_stage = fd_acc_query_set_stage; + + fd_acc_query_register_provider(pctx, &occlusion_counter); + fd_acc_query_register_provider(pctx, &occlusion_predicate); + fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative); + + fd_acc_query_register_provider(pctx, &time_elapsed); + fd_acc_query_register_provider(pctx, ×tamp); +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.h b/src/gallium/drivers/freedreno/a6xx/fd6_query.h new file mode 100644 index 00000000000..09445f6fedf --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_QUERY_H_ +#define FD6_QUERY_H_ + +#include "pipe/p_context.h" + +void fd6_query_context_init(struct pipe_context *pctx); + +#endif /* FD6_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c new file mode 100644 index 00000000000..7f13676799b --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd6_rasterizer.h" +#include "fd6_context.h" +#include "fd6_format.h" + +void * +fd6_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd6_rasterizer_stateobj *so; + float psize_min, psize_max; + + so = CALLOC_STRUCT(fd6_rasterizer_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 4092; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + + so->gras_su_point_minmax = + A6XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | + A6XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); + so->gras_su_point_size = A6XX_GRAS_SU_POINT_SIZE(cso->point_size); + so->gras_su_poly_offset_scale = + A6XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); + so->gras_su_poly_offset_offset = + A6XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + so->gras_su_poly_offset_clamp = + A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp); + + so->gras_su_cntl = + A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0); +#if 0 + so->pc_raster_cntl = + A6XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A6XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); +#endif + +#if 0 + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_raster_cntl |= A6XX_PC_RASTER_CNTL_POLYMODE_ENABLE; +#endif + + if (cso->cull_face & PIPE_FACE_FRONT) + so->gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; + if (cso->cull_face & PIPE_FACE_BACK) + so->gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; + if (!cso->front_ccw) + so->gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; + if (cso->offset_tri) + so->gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; + + if (!cso->flatshade_first) + so->pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST; + +// if (!cso->depth_clip) +// so->gras_cl_clip_cntl |= A6XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | +// A6XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; +#if 0 + if (cso->clip_halfz) + so->gras_cl_clip_cntl |= A6XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z; +#endif + + return so; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h new file mode 100644 index 00000000000..d2020bee377 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_RASTERIZER_H_ +#define FD6_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd6_rasterizer_stateobj { + struct pipe_rasterizer_state base; + + uint32_t gras_su_point_minmax; + uint32_t gras_su_point_size; + uint32_t gras_su_poly_offset_scale; + uint32_t gras_su_poly_offset_offset; + uint32_t gras_su_poly_offset_clamp; + + uint32_t gras_su_cntl; + uint32_t gras_cl_clip_cntl; + uint32_t pc_primitive_cntl; + uint32_t pc_raster_cntl; +}; + +static inline struct fd6_rasterizer_stateobj * +fd6_rasterizer_stateobj(struct pipe_rasterizer_state *rast) +{ + return (struct fd6_rasterizer_stateobj *)rast; +} + +void * fd6_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso); + +#endif /* FD6_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c new file mode 100644 index 00000000000..6f1b745047c --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2018 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "fd6_resource.h" + +/* indexed by cpp: */ +static const struct { + unsigned pitchalign; + unsigned heightalign; +} tile_alignment[] = { + [1] = { 128, 32 }, + [2] = { 128, 16 }, + [3] = { 128, 16 }, + [4] = { 64, 16 }, + [8] = { 64, 16 }, + [12] = { 64, 16 }, + [16] = { 64, 16 }, +}; + +/* NOTE: good way to test this is: (for example) + * piglit/bin/texelFetch fs sampler2D 100x100x1-100x300x1 + */ +static uint32_t +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) +{ + struct pipe_resource *prsc = &rsc->base; + struct fd_screen *screen = fd_screen(prsc->screen); + enum util_format_layout layout = util_format_description(format)->layout; + uint32_t pitchalign = screen->gmem_alignw; + uint32_t heightalign; + uint32_t level, size = 0; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + /* in layer_first layout, the level (slice) contains just one + * layer (since in fact the layer contains the slices) + */ + uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; + + heightalign = tile_alignment[rsc->cpp].heightalign; + + for (level = 0; level <= prsc->last_level; level++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, level); + bool linear_level = fd_resource_level_linear(prsc, level); + uint32_t aligned_height = height; + uint32_t blocks; + + if (rsc->tile_mode && !linear_level) { + pitchalign = tile_alignment[rsc->cpp].pitchalign; + aligned_height = align(aligned_height, heightalign); + } else { + pitchalign = 64; + + /* The blits used for mem<->gmem work at a granularity of + * 32x32, which can cause faults due to over-fetch on the + * last level. The simple solution is to over-allocate a + * bit the last level to ensure any over-fetch is harmless. + * The pitch is already sufficiently aligned, but height + * may not be: + */ + if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER)) + aligned_height = align(aligned_height, 32); + } + + if (layout == UTIL_FORMAT_LAYOUT_ASTC) + slice->pitch = + util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); + else + slice->pitch = align(width, pitchalign); + + slice->offset = size; + blocks = util_format_get_nblocks(format, slice->pitch, aligned_height); + + /* 1d array and 2d array textures must all have the same layer size + * for each miplevel on a3xx. 3d textures can have different layer + * sizes for high levels, but the hw auto-sizer is buggy (or at least + * different than what this code does), so as soon as the layer size + * range gets into range, we stop reducing it. + */ + if (prsc->target == PIPE_TEXTURE_3D && ( + level == 1 || + (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) + slice->size0 = align(blocks * rsc->cpp, alignment); + else if (level == 0 || rsc->layer_first || alignment == 1) + slice->size0 = align(blocks * rsc->cpp, alignment); + else + slice->size0 = rsc->slices[level - 1].size0; + +#if 0 + debug_printf("%s: %ux%ux%u@%u: %2u: stride=%4u, size=%7u, aligned_height=%3u\n", + util_format_name(prsc->format), + prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, + level, slice->pitch * rsc->cpp, + slice->size0 * depth * layers_in_level, + aligned_height); +#endif + + size += slice->size0 * depth * layers_in_level; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + return size; +} + +uint32_t +fd6_setup_slices(struct fd_resource *rsc) +{ + uint32_t alignment; + + switch (rsc->base.target) { + case PIPE_TEXTURE_3D: + rsc->layer_first = false; + alignment = 4096; + break; + default: + rsc->layer_first = true; + alignment = 1; + break; + } + + return setup_slices(rsc, alignment, rsc->base.format); +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.h b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h new file mode 100644 index 00000000000..a19f2744dd7 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2018 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_RESOURCE_H_ +#define FD6_RESOURCE_H_ + +#include "freedreno_resource.h" + +uint32_t fd6_setup_slices(struct fd_resource *rsc); + +#endif /* FD6_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c new file mode 100644 index 00000000000..9e039bf87a9 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" + +#include "fd6_screen.h" +#include "fd6_context.h" +#include "fd6_format.h" +#include "fd6_resource.h" + +#include "ir3_compiler.h" + +static boolean +fd6_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1)) { /* TODO add MSAA */ + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) + return false; + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + (fd6_pipe2vtx(format) != (enum a6xx_vtx_fmt)~0)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && + (fd6_pipe2tex(format) != (enum a6xx_tex_fmt)~0)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | + PIPE_BIND_COMPUTE_RESOURCE)) && + (fd6_pipe2color(format) != (enum a6xx_color_fmt)~0) && + (fd6_pipe2tex(format) != (enum a6xx_tex_fmt)~0)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | + PIPE_BIND_COMPUTE_RESOURCE); + } + + /* For ARB_framebuffer_no_attachments: */ + if ((usage & PIPE_BIND_RENDER_TARGET) && (format == PIPE_FORMAT_NONE)) { + retval |= usage & PIPE_BIND_RENDER_TARGET; + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd6_pipe2depth(format) != (enum a6xx_depth_format)~0) && + (fd6_pipe2tex(format) != (enum a6xx_tex_fmt)~0)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != (enum pc_di_index_size)~0)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +void +fd6_screen_init(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + screen->max_rts = A6XX_MAX_RENDER_TARGETS; + screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id); + pscreen->context_create = fd6_context_create; + pscreen->is_format_supported = fd6_screen_is_format_supported; + + screen->setup_slices = fd6_setup_slices; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.h b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h new file mode 100644 index 00000000000..5c8b00dad2a --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_SCREEN_H_ +#define FD6_SCREEN_H_ + +#include "pipe/p_screen.h" + +void fd6_screen_init(struct pipe_screen *pscreen); + +#endif /* FD6_SCREEN_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c new file mode 100644 index 00000000000..0f342ae8e28 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "fd6_texture.h" +#include "fd6_format.h" + +static enum a6xx_tex_clamp +tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border) +{ + /* Hardware does not support _CLAMP, but we emulate it: */ + if (wrap == PIPE_TEX_WRAP_CLAMP) { + wrap = (clamp_to_edge) ? + PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER; + } + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return A6XX_TEX_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return A6XX_TEX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + *needs_border = true; + return A6XX_TEX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + /* only works for PoT.. need to emulate otherwise! */ + return A6XX_TEX_MIRROR_CLAMP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return A6XX_TEX_MIRROR_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* these two we could perhaps emulate, but we currently + * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP + */ + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum a6xx_tex_filter +tex_filter(unsigned filter, bool aniso) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return A6XX_TEX_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd6_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd6_sampler_stateobj *so = CALLOC_STRUCT(fd6_sampler_stateobj); + unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); + bool miplinear = false; + bool clamp_to_edge; + + if (!so) + return NULL; + + so->base = *cso; + + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + miplinear = true; + + /* + * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE; for linear + * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally + * clamping the texture coordinates to [0.0, 1.0]. + * + * The clamping will be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST); + if (!clamp_to_edge) { + so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP); + so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP); + so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP); + } + + so->needs_border = false; + so->texsamp0 = + COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | + A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | + A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | + A6XX_TEX_SAMP_0_ANISO(aniso) | + A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge, &so->needs_border)) | + A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge, &so->needs_border)) | + A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge, &so->needs_border)); + + so->texsamp1 = + COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | + COND(!cso->normalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS); + + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); + so->texsamp1 |= + A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | + A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + } + + if (cso->compare_mode) + so->texsamp1 |= A6XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + + return so; +} + +static void +fd6_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd6_context *fd6_ctx = fd6_context(ctx); + uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0; + unsigned i; + + if (!hwcso) + nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) { + struct fd6_sampler_stateobj *sampler = + fd6_sampler_stateobj(hwcso[i]); + if (sampler->saturate_s) + saturate_s |= (1 << i); + if (sampler->saturate_t) + saturate_t |= (1 << i); + if (sampler->saturate_r) + saturate_r |= (1 << i); + } + } + + fd_sampler_states_bind(pctx, shader, start, nr, hwcso); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd6_ctx->fsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd6_ctx->fsaturate_s = saturate_s; + fd6_ctx->fsaturate_t = saturate_t; + fd6_ctx->fsaturate_r = saturate_r; + } else if (shader == PIPE_SHADER_VERTEX) { + fd6_ctx->vsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd6_ctx->vsaturate_s = saturate_s; + fd6_ctx->vsaturate_t = saturate_t; + fd6_ctx->vsaturate_r = saturate_r; + } +} + +static bool +use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) +{ + return false; // TODO check if this is still needed on a5xx +} + +static struct pipe_sampler_view * +fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd6_pipe_sampler_view *so = CALLOC_STRUCT(fd6_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + enum pipe_format format = cso->format; + unsigned lvl, layers; + + if (!so) + return NULL; + + if (format == PIPE_FORMAT_X32_S8X24_UINT) { + rsc = rsc->stencil; + format = rsc->base.format; + } + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->texconst0 = + A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) | + fd6_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + + /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle + * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful + * way to re-arrange things so stencil component is where the swiz + * expects. + * + * Note that gallium expects stencil sampler to return (s,s,s,s) + * which isn't quite true. To make that happen we'd have to massage + * the swizzle. But in practice only the .x component is used. + */ + if (format == PIPE_FORMAT_X24S8_UINT) { + so->texconst0 |= A6XX_TEX_CONST_0_SWAP(XYZW); + } + + if (util_format_is_srgb(format)) { + if (use_astc_srgb_workaround(pctx, format)) + so->astc_srgb = true; + so->texconst0 |= A6XX_TEX_CONST_0_SRGB; + } + + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.size / util_format_get_blocksize(format); + + lvl = 0; + so->texconst1 = + A6XX_TEX_CONST_1_WIDTH(elements) | + A6XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A6XX_TEX_CONST_2_FETCHSIZE(fd6_pipe2fetchsize(format)) | + A6XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.offset; + } else { + unsigned miplevels; + + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + + so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A6XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A6XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A6XX_TEX_CONST_2_FETCHSIZE(fd6_pipe2fetchsize(format)) | + A6XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target)); + + switch (cso->target) { + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + so->texconst3 = + A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A6XX_TEX_CONST_5_DEPTH(1); + break; + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + so->texconst3 = + A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A6XX_TEX_CONST_5_DEPTH(layers); + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + so->texconst3 = + A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A6XX_TEX_CONST_5_DEPTH(layers / 6); + break; + case PIPE_TEXTURE_3D: + so->texconst3 = + A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); + so->texconst5 = + A6XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); + break; + default: + so->texconst3 = 0x00000000; + break; + } + + return &so->base; +} + +static void +fd6_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd6_context *fd6_ctx = fd6_context(ctx); + uint16_t astc_srgb = 0; + unsigned i; + + for (i = 0; i < nr; i++) { + if (views[i]) { + struct fd6_pipe_sampler_view *view = + fd6_pipe_sampler_view(views[i]); + if (view->astc_srgb) + astc_srgb |= (1 << i); + } + } + + fd_set_sampler_views(pctx, shader, start, nr, views); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd6_ctx->fastc_srgb = astc_srgb; + } else if (shader == PIPE_SHADER_VERTEX) { + fd6_ctx->vastc_srgb = astc_srgb; + } +} + +void +fd6_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd6_sampler_state_create; + pctx->bind_sampler_states = fd6_sampler_states_bind; + pctx->create_sampler_view = fd6_sampler_view_create; + pctx->set_sampler_views = fd6_set_sampler_views; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.h b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h new file mode 100644 index 00000000000..a45ed6b3a7b --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_TEXTURE_H_ +#define FD6_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_texture.h" +#include "freedreno_resource.h" + +#include "fd6_context.h" +#include "fd6_format.h" + +struct fd6_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t texsamp0, texsamp1, texsamp2, texsamp3; + bool saturate_s, saturate_t, saturate_r; + bool needs_border; +}; + +static inline struct fd6_sampler_stateobj * +fd6_sampler_stateobj(struct pipe_sampler_state *samp) +{ + return (struct fd6_sampler_stateobj *)samp; +} + +struct fd6_pipe_sampler_view { + struct pipe_sampler_view base; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst5; + uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11; + uint32_t offset; + bool astc_srgb; +}; + +static inline struct fd6_pipe_sampler_view * +fd6_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd6_pipe_sampler_view *)pview; +} + +void fd6_texture_init(struct pipe_context *pctx); + + +static inline enum a6xx_tex_type +fd6_tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A6XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A6XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A6XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A6XX_TEX_CUBE; + } +} + +#endif /* FD6_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c new file mode 100644 index 00000000000..84a5d21d41c --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd6_zsa.h" +#include "fd6_context.h" +#include "fd6_format.h" + +void * +fd6_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd6_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd6_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + switch (cso->depth.func) { + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + so->gras_lrz_cntl = A6XX_GRAS_LRZ_CNTL_ENABLE; + break; + + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + so->gras_lrz_cntl = A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_GREATER; + break; + + default: + /* LRZ not enabled */ + so->gras_lrz_cntl = 0; + break; + } + + if (!(cso->stencil->enabled || cso->alpha.enabled || !cso->depth.writemask)) + so->lrz_write = true; + + so->rb_depth_cntl |= + A6XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depth_cntl |= + A6XX_RB_DEPTH_CNTL_Z_ENABLE | + A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + + if (cso->depth.writemask) + so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_stencil_control |= + A6XX_RB_STENCIL_CONTROL_STENCIL_READ | + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A6XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ + A6XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | + A6XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); + + so->rb_stencilmask = A6XX_RB_STENCILMASK_MASK(s->valuemask); + so->rb_stencilwrmask = A6XX_RB_STENCILWRMASK_WRMASK(s->writemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_stencil_control |= + A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A6XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ + A6XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | + A6XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | + A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); + + // TODO backface stencil state? + } + } + + if (cso->alpha.enabled) { + uint32_t ref = cso->alpha.ref_value * 255.0; + so->rb_alpha_control = + A6XX_RB_ALPHA_CONTROL_ALPHA_TEST | + A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | + A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); +// so->rb_depth_control |= +// A6XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + + return so; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h new file mode 100644 index 00000000000..1c626890981 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2016 Rob Clark <[email protected]> + * Copyright © 2018 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD6_ZSA_H_ +#define FD6_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd6_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + + uint32_t rb_alpha_control; + uint32_t rb_depth_cntl; + uint32_t rb_stencil_control; + uint32_t rb_stencilmask; + uint32_t rb_stencilwrmask; + uint32_t gras_lrz_cntl; + bool lrz_write; +}; + +static inline struct fd6_zsa_stateobj * +fd6_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) +{ + return (struct fd6_zsa_stateobj *)zsa; +} + +void * fd6_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso); + +#endif /* FD6_ZSA_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 84de03bad32..dd4b1ca9e64 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -88,6 +88,8 @@ batch_init(struct fd_batch *batch) if (is_a3xx(ctx->screen)) util_dynarray_init(&batch->rbrc_patches, NULL); + util_dynarray_init(&batch->gmem_patches, NULL); + assert(batch->resources->entries == 0); util_dynarray_init(&batch->samples, NULL); @@ -148,6 +150,8 @@ batch_fini(struct fd_batch *batch) if (is_a3xx(batch->ctx->screen)) util_dynarray_fini(&batch->rbrc_patches); + util_dynarray_fini(&batch->gmem_patches); + while (batch->samples.size > 0) { struct fd_hw_sample *samp = util_dynarray_pop(&batch->samples, struct fd_hw_sample *); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index 4b0539d0062..6bb88a62916 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -140,6 +140,11 @@ struct fd_batch { */ struct util_dynarray draw_patches; + /* Keep track of blitter GMEM offsets that need to be patched up once we + * know the gmem layout: + */ + struct util_dynarray gmem_patches; + /* Keep track of writes to RB_RENDER_CONTROL which need to be patched * once we know whether or not to use GMEM, and GMEM tile pitch. * diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 95b4c16e091..981ab0cf763 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -70,7 +70,7 @@ static uint32_t bin_width(struct fd_screen *screen) { - if (is_a4xx(screen) || is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) return 1024; if (is_a3xx(screen)) return 992; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index f882cf5a8b0..344004f6965 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -844,7 +844,8 @@ fd_resource_create(struct pipe_screen *pscreen, assert(rsc->cpp); // XXX probably need some extra work if we hit rsc shadowing path w/ lrz.. - if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) { + if ((is_a5xx(screen) || is_a6xx(screen)) && + (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) { const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 4d54446ec7e..7ba0dac658c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -55,6 +55,8 @@ #include "a3xx/fd3_screen.h" #include "a4xx/fd4_screen.h" #include "a5xx/fd5_screen.h" +#include "a6xx/fd6_screen.h" + #include "ir3/ir3_nir.h" @@ -230,19 +232,19 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_CLIP_HALFZ: - return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_FAKE_SW_MSAA: return !fd_screen_get_param(pscreen, PIPE_CAP_TEXTURE_MULTISAMPLE); case PIPE_CAP_TEXTURE_MULTISAMPLE: - return is_a5xx(screen); + return is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_DEPTH_CLIP_DISABLE: return is_a3xx(screen) || is_a4xx(screen); case PIPE_CAP_POLYGON_OFFSET_CLAMP: - return is_a5xx(screen); + return is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: return 0; @@ -250,6 +252,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) if (is_a3xx(screen)) return 16; if (is_a4xx(screen)) return 32; if (is_a5xx(screen)) return 32; + if (is_a6xx(screen)) return 32; return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: /* We could possibly emulate more by pretending 2d/rect textures and @@ -258,13 +261,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) if (is_a3xx(screen)) return 8192; if (is_a4xx(screen)) return 16384; if (is_a5xx(screen)) return 16384; + if (is_a6xx(screen)) return 16384; return 0; case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_TEXTURE_QUERY_LOD: - return is_a4xx(screen) || is_a5xx(screen); + return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_START_INSTANCE: /* Note that a5xx can do this, it just can't (at least with @@ -284,12 +288,12 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return is_ir3(screen) ? 140 : 120; case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: - if (is_a5xx(screen)) + if (is_a5xx(screen) || is_a6xx(screen)) return 4; return 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - if (is_a4xx(screen) || is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) return 4; return 0; @@ -372,12 +376,12 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return screen->priority_mask; case PIPE_CAP_DRAW_INDIRECT: - if (is_a4xx(screen) || is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) return 1; return 0; case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: - if (is_a4xx(screen) || is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) return 1; return 0; @@ -432,7 +436,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 11; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen)) ? 256 : 0; + return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) ? 256 : 0; /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: @@ -444,11 +448,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_OCCLUSION_QUERY: - return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: /* only a4xx, requires new enough kernel so we know max_freq: */ - return (screen->max_freq > 0) && (is_a4xx(screen) || is_a5xx(screen)); + return (screen->max_freq > 0) && (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)); case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -557,7 +561,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, * split between VS and FS. Use lower limit of 256 to * avoid getting into impossible situations: */ - return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen)) ? 4096 : 64) * sizeof(float[4]); + return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return is_ir3(screen) ? 16 : 1; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: @@ -615,7 +619,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, return is_ir3(screen) ? 1 : 0; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: - if (is_a5xx(screen)) { + if (is_a5xx(screen) || is_a6xx(screen)) { /* a5xx (and a4xx for that matter) has one state-block * for compute-shader SSBO's and another that is shared * by VS/HS/DS/GS/FS.. so to simplify things for now @@ -907,6 +911,9 @@ fd_screen_create(struct fd_device *dev) case 530: fd5_screen_init(pscreen); break; + case 630: + fd6_screen_init(pscreen); + break; default: debug_printf("unsupported GPU: a%03d\n", screen->gpu_id); goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 251a26f1c07..4d9497ccb53 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -146,11 +146,17 @@ is_a5xx(struct fd_screen *screen) return (screen->gpu_id >= 500) && (screen->gpu_id < 600); } +static inline boolean +is_a6xx(struct fd_screen *screen) +{ + return (screen->gpu_id >= 600) && (screen->gpu_id < 700); +} + /* is it using the ir3 compiler (shader isa introduced with a3xx)? */ static inline boolean is_ir3(struct fd_screen *screen) { - return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen); + return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); } static inline bool diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 10151aaa9e7..30e3c6a735f 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -59,8 +59,9 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 #define A5XX_MAX_RENDER_TARGETS 8 +#define A6XX_MAX_RENDER_TARGETS 8 -#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS +#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 #define FD_DBG_DISASM 0x0002 @@ -265,6 +266,12 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, }); } +static inline void +OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); +} + static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) { if (ring->cur + ndwords >= ring->end) diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 7415dc9efff..25c2f627ee3 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -184,6 +184,35 @@ files_libfreedreno = files( 'a5xx/fd5_texture.h', 'a5xx/fd5_zsa.c', 'a5xx/fd5_zsa.h', + 'a6xx/a6xx.xml.h', + 'a6xx/fd6_blend.c', + 'a6xx/fd6_blend.h', + 'a6xx/fd6_context.c', + 'a6xx/fd6_context.h', + 'a6xx/fd6_draw.c', + 'a6xx/fd6_draw.h', + 'a6xx/fd6_emit.c', + 'a6xx/fd6_emit.h', + 'a6xx/fd6_format.c', + 'a6xx/fd6_format.h', + 'a6xx/fd6_gmem.c', + 'a6xx/fd6_gmem.h', + 'a6xx/fd6_image.c', + 'a6xx/fd6_image.h', + 'a6xx/fd6_program.c', + 'a6xx/fd6_program.h', + 'a6xx/fd6_query.c', + 'a6xx/fd6_query.h', + 'a6xx/fd6_rasterizer.c', + 'a6xx/fd6_rasterizer.h', + 'a6xx/fd6_resource.c', + 'a6xx/fd6_resource.h', + 'a6xx/fd6_screen.c', + 'a6xx/fd6_screen.h', + 'a6xx/fd6_texture.c', + 'a6xx/fd6_texture.h', + 'a6xx/fd6_zsa.c', + 'a6xx/fd6_zsa.h', 'ir3/disasm-a3xx.c', 'ir3/instr-a3xx.h', 'ir3/ir3.c', |