diff options
Diffstat (limited to 'src/gallium/drivers/freedreno')
32 files changed, 826 insertions, 245 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index ef235734755..77f708f449c 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index b5e1ddadde0..a6940dfefea 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -111,10 +111,14 @@ enum a3xx_vtx_fmt { VFMT_8_8_SNORM = 53, VFMT_8_8_8_SNORM = 54, VFMT_8_8_8_8_SNORM = 55, - VFMT_10_10_10_2_UINT = 60, - VFMT_10_10_10_2_UNORM = 61, - VFMT_10_10_10_2_SINT = 62, - VFMT_10_10_10_2_SNORM = 63, + VFMT_10_10_10_2_UINT = 56, + VFMT_10_10_10_2_UNORM = 57, + VFMT_10_10_10_2_SINT = 58, + VFMT_10_10_10_2_SNORM = 59, + VFMT_2_10_10_10_UINT = 60, + VFMT_2_10_10_10_UNORM = 61, + VFMT_2_10_10_10_SINT = 62, + VFMT_2_10_10_10_SNORM = 63, }; enum a3xx_tex_fmt { @@ -138,10 +142,12 @@ enum a3xx_tex_fmt { TFMT_DXT1 = 36, TFMT_DXT3 = 37, TFMT_DXT5 = 38, + TFMT_2_10_10_10_UNORM = 40, TFMT_10_10_10_2_UNORM = 41, TFMT_9_9_9_E5_FLOAT = 42, TFMT_11_11_10_FLOAT = 43, TFMT_A8_UNORM = 44, + TFMT_L8_UNORM = 45, TFMT_L8_A8_UNORM = 47, TFMT_8_UNORM = 48, TFMT_8_8_UNORM = 49, @@ -183,6 +189,8 @@ enum a3xx_tex_fmt { TFMT_32_SINT = 92, TFMT_32_32_SINT = 93, TFMT_32_32_32_32_SINT = 95, + TFMT_2_10_10_10_UINT = 96, + TFMT_10_10_10_2_UINT = 97, TFMT_ETC2_RG11_SNORM = 112, TFMT_ETC2_RG11_UNORM = 113, TFMT_ETC2_R11_SNORM = 114, @@ -215,6 +223,9 @@ enum a3xx_color_fmt { RB_R8_UINT = 14, RB_R8_SINT = 15, RB_R10G10B10A2_UNORM = 16, + RB_A2R10G10B10_UNORM = 17, + RB_R10G10B10A2_UINT = 18, + RB_A2R10G10B10_UINT = 19, RB_A8_UNORM = 20, RB_R8_UNORM = 21, RB_R16_FLOAT = 24, @@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select { SP_ALU_ACTIVE_CYCLES = 29, }; -enum a3xx_rop_code { - ROP_CLEAR = 0, - ROP_NOR = 1, - ROP_AND_INVERTED = 2, - ROP_COPY_INVERTED = 3, - ROP_AND_REVERSE = 4, - ROP_INVERT = 5, - ROP_XOR = 6, - ROP_NAND = 7, - ROP_AND = 8, - ROP_EQUIV = 9, - ROP_NOOP = 10, - ROP_OR_INVERTED = 11, - ROP_COPY = 12, - ROP_OR_REVERSE = 13, - ROP_OR = 14, - ROP_SET = 15, -}; - enum a3xx_rb_blend_opcode { BLEND_DST_PLUS_SRC = 0, BLEND_SRC_MINUS_DST = 1, @@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) } #define REG_A3XX_VFD_CONTROL_1 0x00002241 -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) { return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; } +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 +static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; +} #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 3906c9b996e..b8a31d84b3f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ if (ctx->rasterizer->point_size_per_vertex && + fd3_emit_get_vp(emit)->writes_psize && (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 8f9c8b0623c..24afbc9e956 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, fd3_pipe_sampler_view(tex->textures[i]) : &dummy_view; struct fd_resource *rsc = fd_resource(view->base.texture); - unsigned start = fd_sampler_first_level(&view->base); - unsigned end = fd_sampler_last_level(&view->base);; + if (rsc && rsc->base.b.target == PIPE_BUFFER) { + OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element * + util_format_get_blocksize(view->base.format), 0, 0); + j = 1; + } else { + unsigned start = fd_sampler_first_level(&view->base); + unsigned end = fd_sampler_last_level(&view->base);; - for (j = 0; j < (end - start + 1); j++) { - struct fd_resource_slice *slice = + for (j = 0; j < (end - start + 1); j++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, j + start); - OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + } } /* pad the remaining entries w/ null: */ @@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2); + OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | + A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) | + A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + + OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | + A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) | + A3XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | A3XX_VFD_CONTROL_0_PACKETSIZE(2) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 857d156c869..52ea9444517 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), @@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(DXT3_SRGBA, DXT3, NONE, WZYX), _T(DXT5_RGBA, DXT5, NONE, WZYX), _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + /* faked */ + _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX), + _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX), + _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX), }; enum a3xx_vtx_fmt @@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format) { if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; + else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format) } } +unsigned +fd3_pipe2nblocksx(enum pipe_format format, unsigned width) +{ + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + return util_format_get_nblocksx(format, width); +} + /* we need to special case a bit the depth/stencil restore, because we are * using the texture sampler to blit into the depth/stencil buffer, *not* * into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 05c5ea3d247..48c503e9a82 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); enum pipe_format fd3_gmem_restore_format(enum pipe_format format); enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format); enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); +unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 2d6ecb2c050..99ae99ea0c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl; uint32_t sz2 = 0; if (!so) @@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->texconst0 = A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | - A3XX_TEX_CONST_0_MIPLVLS(miplevels) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A3XX_TEX_CONST_0_SRGB; - so->texconst1 = + if (prsc->target == PIPE_BUFFER) { + lvl = 0; + so->texconst1 = + A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | + A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element - + cso->u.buf.first_element + 1) | + A3XX_TEX_CONST_1_HEIGHT(1); + } else { + unsigned miplevels; + + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + + so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + } /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = - A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 9f970365464..a450379e98d 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -47,11 +47,13 @@ enum a4xx_color_fmt { RB4_R8_UNORM = 2, RB4_R4G4B4A4_UNORM = 8, RB4_R5G5B5A1_UNORM = 10, - RB4_R5G6R5_UNORM = 14, + RB4_R5G6B5_UNORM = 14, RB4_R8G8_UNORM = 15, RB4_R8G8_SNORM = 16, RB4_R8G8_UINT = 17, RB4_R8G8_SINT = 18, + RB4_R16_UNORM = 19, + RB4_R16_SNORM = 20, RB4_R16_FLOAT = 21, RB4_R16_UINT = 22, RB4_R16_SINT = 23, @@ -63,12 +65,16 @@ enum a4xx_color_fmt { RB4_R10G10B10A2_UNORM = 31, RB4_R10G10B10A2_UINT = 34, RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_UNORM = 40, + RB4_R16G16_SNORM = 41, RB4_R16G16_FLOAT = 42, RB4_R16G16_UINT = 43, RB4_R16G16_SINT = 44, RB4_R32_FLOAT = 45, RB4_R32_UINT = 46, RB4_R32_SINT = 47, + RB4_R16G16B16A16_UNORM = 52, + RB4_R16G16B16A16_SNORM = 53, RB4_R16G16B16A16_FLOAT = 54, RB4_R16G16B16A16_UINT = 55, RB4_R16G16B16A16_SINT = 56, @@ -106,6 +112,7 @@ enum a4xx_vtx_fmt { VFMT4_32_32_FIXED = 10, VFMT4_32_32_32_FIXED = 11, VFMT4_32_32_32_32_FIXED = 12, + VFMT4_11_11_10_FLOAT = 13, VFMT4_16_SINT = 16, VFMT4_16_16_SINT = 17, VFMT4_16_16_16_SINT = 18, @@ -146,18 +153,19 @@ enum a4xx_vtx_fmt { VFMT4_8_8_SNORM = 53, VFMT4_8_8_8_SNORM = 54, VFMT4_8_8_8_8_SNORM = 55, - VFMT4_10_10_10_2_UINT = 60, - VFMT4_10_10_10_2_UNORM = 61, - VFMT4_10_10_10_2_SINT = 62, - VFMT4_10_10_10_2_SNORM = 63, + VFMT4_10_10_10_2_UINT = 56, + VFMT4_10_10_10_2_UNORM = 57, + VFMT4_10_10_10_2_SINT = 58, + VFMT4_10_10_10_2_SNORM = 59, }; enum a4xx_tex_fmt { TFMT4_5_6_5_UNORM = 11, - TFMT4_5_5_5_1_UNORM = 10, + TFMT4_5_5_5_1_UNORM = 9, TFMT4_4_4_4_4_UNORM = 8, TFMT4_X8Z24_UNORM = 71, TFMT4_10_10_10_2_UNORM = 33, + TFMT4_10_10_10_2_UINT = 34, TFMT4_A8_UNORM = 3, TFMT4_L8_A8_UNORM = 13, TFMT4_8_UNORM = 4, @@ -172,6 +180,12 @@ enum a4xx_tex_fmt { TFMT4_8_SINT = 7, TFMT4_8_8_SINT = 17, TFMT4_8_8_8_8_SINT = 31, + TFMT4_16_UNORM = 18, + TFMT4_16_16_UNORM = 38, + TFMT4_16_16_16_16_UNORM = 51, + TFMT4_16_SNORM = 19, + TFMT4_16_16_SNORM = 39, + TFMT4_16_16_16_16_SNORM = 52, TFMT4_16_UINT = 21, TFMT4_16_16_UINT = 41, TFMT4_16_16_16_16_UINT = 54, @@ -190,8 +204,21 @@ enum a4xx_tex_fmt { TFMT4_32_FLOAT = 43, TFMT4_32_32_FLOAT = 56, TFMT4_32_32_32_32_FLOAT = 63, + TFMT4_32_32_32_FLOAT = 59, + TFMT4_32_32_32_UINT = 60, + TFMT4_32_32_32_SINT = 61, TFMT4_9_9_9_E5_FLOAT = 32, TFMT4_11_11_10_FLOAT = 37, + TFMT4_DXT1 = 86, + TFMT4_DXT3 = 87, + TFMT4_DXT5 = 88, + TFMT4_RGTC1_UNORM = 90, + TFMT4_RGTC1_SNORM = 91, + TFMT4_RGTC2_UNORM = 94, + TFMT4_RGTC2_SNORM = 95, + TFMT4_BPTC_UFLOAT = 97, + TFMT4_BPTC_FLOAT = 98, + TFMT4_BPTC = 99, TFMT4_ATC_RGB = 100, TFMT4_ATC_RGBA_EXPLICIT = 101, TFMT4_ATC_RGBA_INTERPOLATED = 102, @@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 #define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 #define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400 -#define A4XX_RB_MRT_CONTROL_B11 0x00000800 +#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) @@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) { return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; } -#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100 +#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) @@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 +#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 @@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 -#define REG_A4XX_UNKNOWN_21C5 0x000021c5 +#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 #define REG_A4XX_PC_RESTART_INDEX 0x000021c6 @@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) { return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; } +#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; +} #define REG_A4XX_TEX_SAMP_1 0x00000001 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e @@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val { return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; } +#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 #define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 @@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) { return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; } -#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000 +#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 #define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index d5e823ef69d..f19702280e0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -27,6 +27,7 @@ */ #include "pipe/p_state.h" +#include "util/u_blend.h" #include "util/u_string.h" #include "util/u_memory.h" @@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx, const struct pipe_blend_state *cso) { struct fd4_blend_stateobj *so; -// enum a3xx_rop_code rop = ROP_COPY; + enum a3xx_rop_code rop = ROP_COPY; bool reads_dest = false; unsigned i, mrt_blend = 0; if (cso->logicop_enable) { -// rop = cso->logicop_func; /* maps 1:1 */ + rop = cso->logicop_func; /* maps 1:1 */ switch (cso->logicop_func) { case PIPE_LOGICOP_NOR: @@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx, else rt = &cso->rt[0]; - so->rb_mrt[i].blend_control = + so->rb_mrt[i].blend_control_rgb = A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | - A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + so->rb_mrt[i].blend_control_no_alpha_rgb = + A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + so->rb_mrt[i].control = - 0xc00 | /* XXX ROP_CODE ?? */ + A4XX_RB_MRT_CONTROL_ROP_CODE(rop) | + COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); if (rt->blend_enable) { @@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx, mrt_blend |= (1 << i); } - if (reads_dest) + if (reads_dest) { so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } if (cso->dither) so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); } - so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend); + so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND); return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h index 7620d00a625..6230fa7a50e 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h @@ -39,7 +39,12 @@ struct fd4_blend_stateobj { struct { uint32_t control; uint32_t buf_info; - uint32_t blend_control; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; } rb_mrt[A4XX_MAX_RENDER_TARGETS]; uint32_t rb_fs_output; }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 7bd5163529a..8cbe68d5790 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit))) return; @@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); + /* points + psize -> spritelist: */ + if (ctx->rasterizer->point_size_per_vertex && + fd4_emit_get_vp(emit)->writes_psize && + (info->mode == PIPE_PRIM_POINTS)) + primtype = DI_PT_POINTLIST_PSIZE; + fd4_draw_emit(ctx, ring, + primtype, emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, info); } @@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h index b89a30a7c4b..a6c56404a8a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size) } static inline void fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info) { struct pipe_index_buffer *idx = &ctx->indexbuf; struct fd_bo *idx_bo = NULL; - enum pc_di_primtype primtype = ctx->primtypes[info->mode]; enum a4xx_index_size idx_type; enum pc_di_src_sel src_sel; uint32_t idx_size, idx_offset; @@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, src_sel = DI_SRC_SEL_AUTO_INDEX; } - /* points + psize -> spritelist: */ - if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && - (info->mode == PIPE_PRIM_POINTS)) - primtype = DI_PT_POINTLIST_PSIZE; - fd4_draw(ctx, ring, primtype, vismode, src_sel, info->count, info->instance_count, idx_type, idx_size, idx_offset, idx_bo); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 26b58718cd8..f220fc7ac1f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct fd4_pipe_sampler_view *view = tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i]) : &dummy_view; - unsigned start = fd_sampler_first_level(&view->base); OUT_RING(ring, view->texconst0); OUT_RING(ring, view->texconst1); @@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); - uint32_t offset = fd_resource_offset(rsc, start, 0); - OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x00000000); } @@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format))); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); @@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1)); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1)); + + OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL | + A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) | + A4XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5); OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 0xa0000 | /* XXX */ @@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); } - if (dirty & FD_DIRTY_ZSA) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST; OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); - OUT_RING(ring, zsa->rb_alpha_control); + OUT_RING(ring, rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); @@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, */ if (emit->info) { const struct pipe_draw_info *info = emit->info; - uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) - ->pc_prim_vtx_cntl; + struct fd4_rasterizer_stateobj *rast = + fd4_rasterizer_stateobj(ctx->rasterizer); + uint32_t val = rast->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; @@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); - OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ + OUT_RING(ring, rast->pc_prim_vtx_cntl2); } if (dirty & FD_DIRTY_SCISSOR) { @@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) { + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); } @@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format( + ctx->framebuffer.cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A4XX_RB_MRT_CONTROL_BLEND2; + } + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].control); + OUT_RING(ring, control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].blend_control); + OUT_RING(ring, blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); @@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } - if (dirty & FD_DIRTY_BLEND_COLOR) { + if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) { struct pipe_blend_color *bcolor = &ctx->blend_color; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + float factor = 65535.0; + int i; + + for (i = 0; i < pfb->nr_cbufs; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + const struct util_format_description *desc = + util_format_description(format); + int j; + + if (desc->is_mixed) + continue; + + j = util_format_get_first_non_void_channel(format); + if (j == -1) + continue; + + if (desc->channel[j].size > 8 || !desc->channel[j].normalized || + desc->channel[j].pure_integer) + continue; + + /* Just use the first unorm8/snorm8 render buffer. Can't keep + * everyone happy. + */ + if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED) + factor = 32767.0; + break; + } + OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8); - OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0])); - OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1])); - OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2])); - OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) | + OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 847d4fb6d63..c240745cec1 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), /* 16-bit */ - V_(R16_UNORM, 16_UNORM, NONE, WZYX), - V_(R16_SNORM, 16_SNORM, NONE, WZYX), - VT(R16_UINT, 16_UINT, R16_UINT, WZYX), - VT(R16_SINT, 16_SINT, R16_SINT, WZYX), - V_(R16_USCALED, 16_UINT, NONE, WZYX), - V_(R16_SSCALED, 16_UINT, NONE, WZYX), - VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX), - - _T(A16_UINT, 16_UINT, NONE, WZYX), - _T(A16_SINT, 16_SINT, NONE, WZYX), - _T(L16_UINT, 16_UINT, NONE, WZYX), - _T(L16_SINT, 16_SINT, NONE, WZYX), - _T(I16_UINT, 16_UINT, NONE, WZYX), - _T(I16_SINT, 16_SINT, NONE, WZYX), + VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), + VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + + _T(A16_UNORM, 16_UNORM, NONE, WZYX), + _T(A16_SNORM, 16_SNORM, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UNORM, 16_UNORM, NONE, WZYX), + _T(L16_SNORM, 16_SNORM, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UNORM, 16_UNORM, NONE, WZYX), + _T(I16_SNORM, 16_SNORM, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), @@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), @@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(I32_UINT, 32_UINT, NONE, WZYX), _T(I32_SINT, 32_SINT, NONE, WZYX), - V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX), - V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX), - VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), - VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), - V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), - V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), - VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX), + VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), + VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), - _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), - _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), + _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), @@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), - V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), + VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), + VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), + V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), - _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), @@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), /* 64-bit */ - V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX), - V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX), + VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), + VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), + VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), _T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), @@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), /* 96-bit */ - V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), - V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), - V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), /* 128-bit */ @@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ + _T(ETC1_RGB8, ETC1, NONE, WZYX), + _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), + _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), + _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), + _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), + _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), + _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), + _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + + _T(DXT1_RGB, DXT1, NONE, WZYX), + _T(DXT1_SRGB, DXT1, NONE, WZYX), + _T(DXT1_RGBA, DXT1, NONE, WZYX), + _T(DXT1_SRGBA, DXT1, NONE, WZYX), + _T(DXT3_RGBA, DXT3, NONE, WZYX), + _T(DXT3_SRGBA, DXT3, NONE, WZYX), + _T(DXT5_RGBA, DXT5, NONE, WZYX), + _T(DXT5_SRGBA, DXT5, NONE, WZYX), + + _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), + _T(BPTC_SRGBA, BPTC, NONE, WZYX), + _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), + _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + + _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), + _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), + _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), + _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), + _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + + _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + + _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), + _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), + _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), + _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), + _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), + _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), + _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), + _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), + _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), + _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), + _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), + _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), + _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), + _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), }; /* convert pipe format to vertex buffer format: */ @@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format) if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) format = PIPE_FORMAT_Z32_FLOAT; - switch (util_format_get_blocksizebits(format)) { + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH4_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { case 8: return TFETCH4_1_BYTE; case 16: return TFETCH4_2_BYTE; case 32: return TFETCH4_4_BYTE; case 64: return TFETCH4_8_BYTE; + case 96: return TFETCH4_1_BYTE; /* Does this matter? */ case 128: return TFETCH4_16_BYTE; default: debug_printf("Unknown block size for format %s: %d\n", diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 3f8bbf3a124..221608127b4 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | - A4XX_RB_MRT_CONTROL_B11 | + OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index e3d5dabab4c..3df13543148 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* adjust regids for alpha output formats. there is no alpha render - * format, so it's just treated like red - */ - for (i = 0; i < nr; i++) - if (util_format_is_alpha(pipe_surface_format(bufs[i]))) - color_regid[i] += 3; - /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c index dc7e98b149d..7456c63febe 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, so->gras_su_mode_control = A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); + so->pc_prim_vtx_cntl2 = + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE; if (cso->cull_face & PIPE_FACE_FRONT) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; @@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, if (cso->offset_tri) so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + if (!cso->depth_clip) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; + if (cso->clip_halfz) + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; + return so; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h index 64e81a9983b..b56a04da6a8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h @@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj { uint32_t gras_su_mode_control; uint32_t gras_cl_clip_cntl; uint32_t pc_prim_vtx_cntl; + uint32_t pc_prim_vtx_cntl2; }; static inline struct fd4_rasterizer_stateobj * diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index d8ea414f300..b2a69cca56c 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen, } if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && (fd4_pipe2tex(format) != ~0)) { retval |= PIPE_BIND_SAMPLER_VIEW; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index dbff5a738fd..0eba75577b0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx, so->texsamp1 = // COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | + COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS); if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp1 |= A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); @@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); - unsigned lvl = fd_sampler_first_level(cso); - unsigned miplevels = fd_sampler_last_level(cso) - lvl; + unsigned lvl, layers; + uint32_t sz2 = 0; if (!so) return NULL; @@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->base.context = pctx; so->texconst0 = - A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | + A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) | A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) | - A4XX_TEX_CONST_0_MIPLVLS(miplevels) | fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (util_format_is_srgb(cso->format)) so->texconst0 |= A4XX_TEX_CONST_0_SRGB; - so->texconst1 = - A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | - A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); - so->texconst2 = - A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | - A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp); + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.last_element - + cso->u.buf.first_element + 1; + lvl = 0; + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(elements) | + A4XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.first_element * + util_format_get_blocksize(cso->format); + } else { + unsigned miplevels; - switch (prsc->target) { + lvl = fd_sampler_first_level(cso); + miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + + so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) | + A4XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + switch (cso->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size) | + A4XX_TEX_CONST_3_DEPTH(layers) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = - A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) | + A4XX_TEX_CONST_3_DEPTH(layers / 6) | A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size); break; case PIPE_TEXTURE_3D: so->texconst3 = A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | - A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0); + A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0); + while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) + sz2 = rsc->slices[++lvl].size0; + so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2); break; default: so->texconst3 = 0x00000000; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h index 31955770a85..6ca34ade60d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp) struct fd4_pipe_sampler_view { struct pipe_sampler_view base; - uint32_t texconst0, texconst1, texconst2, texconst3, textconst4; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst4; + uint32_t offset; }; static inline struct fd4_pipe_sampler_view * diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index ca3d2ac3fca..0e0f0e65e9b 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: @@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode { RB_COPY_DEPTH_STENCIL = 5, }; +enum a3xx_rop_code { + ROP_CLEAR = 0, + ROP_NOR = 1, + ROP_AND_INVERTED = 2, + ROP_COPY_INVERTED = 3, + ROP_AND_REVERSE = 4, + ROP_INVERT = 5, + ROP_XOR = 6, + ROP_NAND = 7, + ROP_AND = 8, + ROP_EQUIV = 9, + ROP_NOOP = 10, + ROP_OR_INVERTED = 11, + ROP_COPY = 12, + ROP_OR_REVERSE = 13, + ROP_OR = 14, + ROP_SET = 15, +}; + enum a3xx_render_mode { RB_RENDERING_PASS = 0, RB_TILING_PASS = 1, diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index f095e3061b2..4aabc086607 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 61c4c6d6e24..571c8142bf7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -359,6 +359,10 @@ struct fd_context { struct fd_streamout_stateobj streamout; struct pipe_clip_state ucp; + struct pipe_query *cond_query; + bool cond_cond; /* inverted rendering condition */ + uint cond_mode; + /* GMEM/tile handling fxns: */ void (*emit_tile_init)(struct fd_context *ctx); void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 7bf3343f43a..bf803cc77bc 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) return; } + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* emulate unsupported primitives: */ if (!fd_supported_prim(ctx, info->mode)) { if (ctx->streamout.num_targets > 0) @@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, unsigned cleared_buffers; int i; + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + /* for bookkeeping about which buffers have been cleared (and thus * can fully or partially skip mem2gmem) we need to ignore buffers * that have already had a draw, in case apps do silly things like diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index db2683c9b6f..b87e8250719 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, return q->funcs->get_query_result(fd_context(pctx), q, wait, result); } +static void +fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq, + boolean condition, uint mode) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->cond_query = pq; + ctx->cond_cond = condition; + ctx->cond_mode = mode; +} + static int fd_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) @@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx) pctx->begin_query = fd_begin_query; pctx->end_query = fd_end_query; pctx->get_query_result = fd_get_query_result; + pctx->render_condition = fd_render_condition; } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 98de0969cab..63ca9e30620 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_rgtc.h" #include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } -/* Currently this is only used for flushing Z32_S8 texture transfers, but - * eventually it should handle everything. - */ +static unsigned +fd_resource_layer_offset(struct fd_resource *rsc, + struct fd_resource_slice *slice, + unsigned layer) +{ + if (rsc->layer_first) + return layer * rsc->layer_size; + else + return layer * slice->size0; +} + static void -fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(trans->base.resource); struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); @@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) enum pipe_format format = trans->base.resource->format; float *depth = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; - assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || - format == PIPE_FORMAT_X32_S8X24_UINT); - if (format != PIPE_FORMAT_X32_S8X24_UINT) util_format_z32_float_s8x24_uint_unpack_z_float( depth, slice->pitch * 4, @@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) box->width, box->height); } +static void +fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + uint8_t *data = fd_bo_map(rsc->bo) + slice->offset + + fd_resource_layer_offset(rsc, slice, trans->base.box.z) + + ((trans->base.box.y + box->y) * slice->pitch + + trans->base.box.x + box->x) * rsc->cpp; + + uint8_t *source = trans->staging + + util_format_get_nblocksy(format, box->y) * trans->base.stride + + util_format_get_stride(format, box->x); + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_unpack_rgba_8unorm( + data, slice->pitch * rsc->cpp, + source, trans->base.stride, + box->width, box->height); + break; + default: + assert(!"Unexpected format\n"); + break; + } +} + +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + enum pipe_format format = trans->base.resource->format; + + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + fd_resource_flush_z32s8(trans, box); + break; + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + fd_resource_flush_rgtc(trans, box); + break; + default: + assert(!"Unexpected staging transfer type"); + break; + } +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - if (rsc->layer_first) { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * rsc->layer_size; - } else { - offset = slice->offset + - box->y / util_format_get_blockheight(format) * ptrans->stride + - box->x / util_format_get_blockwidth(format) * rsc->cpp + - box->z * slice->size0; - } + offset = slice->offset + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp + + fd_resource_layer_offset(rsc, slice, box->z); if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + assert(trans->base.box.depth == 1); + trans->base.stride = trans->base.box.width * rsc->cpp * 2; trans->staging = malloc(trans->base.stride * trans->base.box.height); if (!trans->staging) @@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx, goto fail; float *depth = (float *)(buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + box->y * slice->pitch * 4 + box->x * 4); uint8_t *stencil = sbuf + sslice->offset + + fd_resource_layer_offset(rsc->stencil, sslice, box->z) + box->y * sslice->pitch + box->x; if (format != PIPE_FORMAT_X32_S8X24_UINT) @@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx, buf = trans->staging; offset = 0; + } else if (rsc->internal_format != format && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) { + assert(trans->base.box.depth == 1); + + trans->base.stride = util_format_get_stride( + format, trans->base.box.width); + trans->staging = malloc( + util_format_get_2d_size(format, trans->base.stride, + trans->base.box.height)); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + uint8_t *rgba8 = (uint8_t *)buf + slice->offset + + fd_resource_layer_offset(rsc, slice, box->z) + + box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp; + + switch (format) { + case PIPE_FORMAT_RGTC1_UNORM: + case PIPE_FORMAT_RGTC1_SNORM: + case PIPE_FORMAT_LATC1_UNORM: + case PIPE_FORMAT_LATC1_SNORM: + util_format_rgtc1_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + case PIPE_FORMAT_RGTC2_UNORM: + case PIPE_FORMAT_RGTC2_SNORM: + case PIPE_FORMAT_LATC2_UNORM: + case PIPE_FORMAT_LATC2_SNORM: + util_format_rgtc2_unorm_pack_rgba_8unorm( + trans->staging, trans->base.stride, + rgba8, slice->pitch * rsc->cpp, + box->width, box->height); + break; + default: + assert(!"Unexpected format"); + break; + } + } + + buf = trans->staging; + offset = 0; } *pptrans = ptrans; @@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = { }; static uint32_t -setup_slices(struct fd_resource *rsc, uint32_t alignment) +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) { struct pipe_resource *prsc = &rsc->base.b; + enum util_format_layout layout = util_format_description(format)->layout; uint32_t level, size = 0; uint32_t width = prsc->width0; uint32_t height = prsc->height0; @@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) struct fd_resource_slice *slice = fd_resource_slice(rsc, level); uint32_t blocks; - slice->pitch = width = align(width, 32); + if (layout == UTIL_FORMAT_LAYOUT_ASTC) + slice->pitch = width = + util_align_npot(width, 32 * util_format_get_blockwidth(format)); + else + slice->pitch = width = align(width, 32); slice->offset = size; - blocks = util_format_get_nblocks(prsc->format, width, height); + blocks = util_format_get_nblocks(format, width, height); /* 1d array and 2d array textures must all have the same layer size * for each miplevel on a3xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least @@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen, { struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct pipe_resource *prsc = &rsc->base.b; - uint32_t size; + enum pipe_format format = tmpl->format; + uint32_t size, alignment; DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", - tmpl->target, util_format_name(tmpl->format), + tmpl->target, util_format_name(format), tmpl->width0, tmpl->height0, tmpl->depth0, tmpl->array_size, tmpl->last_level, tmpl->nr_samples, tmpl->usage, tmpl->bind, tmpl->flags); @@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); - else - rsc->cpp = util_format_get_blocksize(tmpl->format); + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + else if (fd_screen(pscreen)->gpu_id < 400 && + util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) + format = PIPE_FORMAT_R8G8B8A8_UNORM; + rsc->internal_format = format; + rsc->cpp = util_format_get_blocksize(format); assert(rsc->cpp); + alignment = slice_alignment(pscreen, tmpl); if (is_a4xx(fd_screen(pscreen))) { switch (tmpl->target) { case PIPE_TEXTURE_3D: @@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen, break; default: rsc->layer_first = true; + alignment = 1; break; } } - size = setup_slices(rsc, slice_alignment(pscreen, tmpl)); + size = setup_slices(rsc, alignment, format); if (rsc->layer_first) { rsc->layer_size = align(size, 4096); @@ -548,7 +680,7 @@ fail: return NULL; } -static void fd_blitter_pipe_begin(struct fd_context *ctx); +static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond); static void fd_blitter_pipe_end(struct fd_context *ctx); /** @@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx, if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) return false; - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, false); util_blitter_copy_texture(ctx->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); @@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx, src, src_level, src_box); } +bool +fd_render_condition_check(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + if (!ctx->cond_query) + return true; + + union pipe_query_result res = { 0 }; + bool wait = + ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT && + ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + + if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res)) + return (bool)res.u64 != ctx->cond_cond; + + return true; +} + /** * Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. @@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } + if (info.render_condition_enable && !fd_render_condition_check(pctx)) + return; + if (util_try_blit_via_copy_region(pctx, &info)) { return; /* done */ } @@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } - fd_blitter_pipe_begin(ctx); + fd_blitter_pipe_begin(ctx, info.render_condition_enable); util_blitter_blit(ctx->blitter, &info); fd_blitter_pipe_end(ctx); } static void -fd_blitter_pipe_begin(struct fd_context *ctx) +fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond) { util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb); util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx); @@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx) (void **)ctx->fragtex.samplers); util_blitter_save_fragment_sampler_views(ctx->blitter, ctx->fragtex.num_textures, ctx->fragtex.textures); + if (!render_cond) + util_blitter_save_render_condition(ctx->blitter, + ctx->cond_query, ctx->cond_cond, ctx->cond_mode); fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT); } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 7549becaa1f..9a9b0d08244 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -73,6 +73,7 @@ struct fd_resource { struct u_resource base; struct fd_bo *bo; uint32_t cpp; + enum pipe_format internal_format; bool layer_first; /* see above description */ uint32_t layer_size; struct fd_resource_slice slices[MAX_MIP_LEVELS]; @@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) void fd_resource_screen_init(struct pipe_screen *pscreen); void fd_resource_context_init(struct pipe_context *pctx); +bool fd_render_condition_check(struct pipe_context *pctx); + #endif /* FREEDRENO_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 56d1834ef9c..5bbe4016a2a 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_START_INSTANCE: case PIPE_CAP_COMPUTE: return 0; @@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_CLIP_HALFZ: return is_a3xx(screen) || is_a4xx(screen); case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - /* ignoring first/last_element.. but I guess that should be - * easy to add.. - */ + if (is_a3xx(screen)) return 16; + if (is_a4xx(screen)) return 32; return 0; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - /* I think 32k on a4xx.. and we could possibly emulate more - * by pretending 2d/rect textures and splitting high bits - * of index into 2nd dimension.. + /* We could possibly emulate more by pretending 2d/rect textures and + * splitting high bits of index into 2nd dimension.. */ - return 16383; - - case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - return is_a3xx(screen); + if (is_a3xx(screen)) return 8192; + if (is_a4xx(screen)) return 16384; + return 0; case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TEXTURE_QUERY_LOD: return is_a4xx(screen); case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: if (glsl120) return 120; - return is_ir3(screen) ? 130 : 120; + return is_ir3(screen) ? 140 : 120; /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: - case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c index 04e4643b4c9..f5611abaec8 100644 --- a/src/gallium/drivers/freedreno/freedreno_texture.c +++ b/src/gallium/drivers/freedreno/freedreno_texture.c @@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr, continue; const struct util_format_channel_description *chan = - &desc->channel[desc->swizzle[j]]; - int size = chan->size; - - /* The Z16 texture format we use seems to look in the - * 32-bit border color slots - */ - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) - size = 32; - - /* Formats like R11G11B10 or RGB9_E5 don't specify - * per-channel sizes properly. - */ - if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) - size = 16; - - if (chan->pure_integer && size > 16) - bcolor32[desc->swizzle[j] + 4] = - sampler->border_color.i[j]; - else if (size > 16) - bcolor32[desc->swizzle[j]] = - fui(sampler->border_color.f[j]); - else if (chan->pure_integer) - bcolor[desc->swizzle[j] + 8] = - sampler->border_color.i[j]; - else + &desc->channel[desc->swizzle[j]]; + if (chan->pure_integer) { + bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j]; + bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j]; + } else { + bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]); bcolor[desc->swizzle[j]] = - util_float_to_half(sampler->border_color.f[j]); + util_float_to_half(sampler->border_color.f[j]); + } } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 157dc73a3c6..156bb0be247 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0); break; + case nir_op_bit_count: + dst[0] = ir3_CBITS_B(b, src[0], 0); + break; + case nir_op_ifind_msb: { + struct ir3_instruction *cmp; + dst[0] = ir3_CLZ_S(b, src[0], 0); + cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0); + cmp->cat2.condition = IR3_COND_GE; + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + cmp, 0, dst[0], 0); + break; + } + case nir_op_ufind_msb: + dst[0] = ir3_CLZ_B(b, src[0], 0); + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + src[0], 0, dst[0], 0); + break; + case nir_op_find_lsb: + dst[0] = ir3_BFREV_B(b, src[0], 0); + dst[0] = ir3_CLZ_B(b, dst[0], 0); + break; + case nir_op_bitfield_reverse: + dst[0] = ir3_BFREV_B(b, src[0], 0); + break; + default: compile_error(ctx, "Unhandled ALU op: %s\n", nir_op_infos[alu->op].name); @@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) unreachable("bad sampler_dim"); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) flags |= IR3_INSTR_S; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) flags |= IR3_INSTR_A; *flagsp = flags; @@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_texop_txl: opc = OPC_SAML; break; case nir_texop_txd: opc = OPC_SAMGQ; break; case nir_texop_txf: opc = OPC_ISAML; break; + case nir_texop_lod: opc = OPC_GETLOD; break; case nir_texop_txf_ms: case nir_texop_txs: - case nir_texop_lod: case nir_texop_tg4: case nir_texop_query_levels: case nir_texop_texture_samples: + case nir_texop_samples_identical: compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); return; } @@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) src0[nsrc0++] = create_immed(b, fui(0.5)); } - if (tex->is_shadow) + if (tex->is_shadow && tex->op != nir_texop_lod) src0[nsrc0++] = compare; - if (tex->is_array) + if (tex->is_array && tex->op != nir_texop_lod) src0[nsrc0++] = coord[coords]; if (has_proj) { @@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) case nir_type_int: type = TYPE_S32; break; - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: type = TYPE_U32; break; @@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) unreachable("bad dest_type"); } + if (opc == OPC_GETLOD) + type = TYPE_U32; + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, tex->sampler_index, tex->sampler_index, create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); split_dest(b, dst, sam, 4); + + /* GETLOD returns results in 4.8 fixed point */ + if (opc == OPC_GETLOD) { + struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256)); + + compile_assert(ctx, tex->dest_type == nir_type_float); + for (i = 0; i < 2; i++) { + dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0, + factor, 0); + } + } } static void @@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr) case nir_texop_query_levels: emit_tex_query_levels(ctx, tex); break; + case nir_texop_samples_identical: + unreachable("nir_texop_samples_identical"); default: emit_tex(ctx, tex); break; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 7e2c27d9765..5d1cccb0daa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -166,7 +166,9 @@ struct ir3_shader_variant { } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_psize; - /* vertices/inputs: */ + /* attributes (VS) / varyings (FS): + * Note that sysval's should come *after* normal inputs. + */ unsigned inputs_count; struct { uint8_t slot; @@ -229,7 +231,7 @@ struct ir3_shader { struct ir3_compiler *compiler; - struct pipe_context *pctx; + struct pipe_context *pctx; /* TODO replace w/ pipe_screen */ const struct tgsi_token *tokens; struct pipe_stream_output_info stream_output; |