summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c60
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.c24
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.h1
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c23
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h80
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.c27
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c11
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c132
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_format.c147
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c12
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h1
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c56
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.h3
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h23
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c8
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.c11
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c221
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c36
-rw-r--r--src/gallium/drivers/freedreno/freedreno_texture.c34
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c56
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h6
32 files changed, 826 insertions, 245 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index ef235734755..77f708f449c 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index b5e1ddadde0..a6940dfefea 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -111,10 +111,14 @@ enum a3xx_vtx_fmt {
VFMT_8_8_SNORM = 53,
VFMT_8_8_8_SNORM = 54,
VFMT_8_8_8_8_SNORM = 55,
- VFMT_10_10_10_2_UINT = 60,
- VFMT_10_10_10_2_UNORM = 61,
- VFMT_10_10_10_2_SINT = 62,
- VFMT_10_10_10_2_SNORM = 63,
+ VFMT_10_10_10_2_UINT = 56,
+ VFMT_10_10_10_2_UNORM = 57,
+ VFMT_10_10_10_2_SINT = 58,
+ VFMT_10_10_10_2_SNORM = 59,
+ VFMT_2_10_10_10_UINT = 60,
+ VFMT_2_10_10_10_UNORM = 61,
+ VFMT_2_10_10_10_SINT = 62,
+ VFMT_2_10_10_10_SNORM = 63,
};
enum a3xx_tex_fmt {
@@ -138,10 +142,12 @@ enum a3xx_tex_fmt {
TFMT_DXT1 = 36,
TFMT_DXT3 = 37,
TFMT_DXT5 = 38,
+ TFMT_2_10_10_10_UNORM = 40,
TFMT_10_10_10_2_UNORM = 41,
TFMT_9_9_9_E5_FLOAT = 42,
TFMT_11_11_10_FLOAT = 43,
TFMT_A8_UNORM = 44,
+ TFMT_L8_UNORM = 45,
TFMT_L8_A8_UNORM = 47,
TFMT_8_UNORM = 48,
TFMT_8_8_UNORM = 49,
@@ -183,6 +189,8 @@ enum a3xx_tex_fmt {
TFMT_32_SINT = 92,
TFMT_32_32_SINT = 93,
TFMT_32_32_32_32_SINT = 95,
+ TFMT_2_10_10_10_UINT = 96,
+ TFMT_10_10_10_2_UINT = 97,
TFMT_ETC2_RG11_SNORM = 112,
TFMT_ETC2_RG11_UNORM = 113,
TFMT_ETC2_R11_SNORM = 114,
@@ -215,6 +223,9 @@ enum a3xx_color_fmt {
RB_R8_UINT = 14,
RB_R8_SINT = 15,
RB_R10G10B10A2_UNORM = 16,
+ RB_A2R10G10B10_UNORM = 17,
+ RB_R10G10B10A2_UINT = 18,
+ RB_A2R10G10B10_UINT = 19,
RB_A8_UNORM = 20,
RB_R8_UNORM = 21,
RB_R16_FLOAT = 24,
@@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select {
SP_ALU_ACTIVE_CYCLES = 29,
};
-enum a3xx_rop_code {
- ROP_CLEAR = 0,
- ROP_NOR = 1,
- ROP_AND_INVERTED = 2,
- ROP_COPY_INVERTED = 3,
- ROP_AND_REVERSE = 4,
- ROP_INVERT = 5,
- ROP_XOR = 6,
- ROP_NAND = 7,
- ROP_AND = 8,
- ROP_EQUIV = 9,
- ROP_NOOP = 10,
- ROP_OR_INVERTED = 11,
- ROP_COPY = 12,
- ROP_OR_REVERSE = 13,
- ROP_OR = 14,
- ROP_SET = 15,
-};
-
enum a3xx_rb_blend_opcode {
BLEND_DST_PLUS_SRC = 0,
BLEND_SRC_MINUS_DST = 1,
@@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
}
#define REG_A3XX_VFD_CONTROL_1 0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f
#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0
static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
{
return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
}
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000
#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16
static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 3906c9b996e..b8a31d84b3f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
+ fd3_emit_get_vp(emit)->writes_psize &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 8f9c8b0623c..24afbc9e956 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd3_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = fd_sampler_first_level(&view->base);
- unsigned end = fd_sampler_last_level(&view->base);;
+ if (rsc && rsc->base.b.target == PIPE_BUFFER) {
+ OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element *
+ util_format_get_blocksize(view->base.format), 0, 0);
+ j = 1;
+ } else {
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);;
- for (j = 0; j < (end - start + 1); j++) {
- struct fd_resource_slice *slice =
+ for (j = 0; j < (end - start + 1); j++) {
+ struct fd_resource_slice *slice =
fd_resource_slice(rsc, j + start);
- OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ }
}
/* pad the remaining entries w/ null: */
@@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+ A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 857d156c869..52ea9444517 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
@@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
_T(DXT3_SRGBA, DXT3, NONE, WZYX),
_T(DXT5_RGBA, DXT5, NONE, WZYX),
_T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ /* faked */
+ _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
};
enum a3xx_vtx_fmt
@@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format)
{
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
+ else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH_1_BYTE;
case 16: return TFETCH_2_BYTE;
@@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format)
}
}
+unsigned
+fd3_pipe2nblocksx(enum pipe_format format, unsigned width)
+{
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ return util_format_get_nblocksx(format, width);
+}
+
/* we need to special case a bit the depth/stencil restore, because we are
* using the texture sampler to blit into the depth/stencil buffer, *not*
* into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 05c5ea3d247..48c503e9a82 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
enum pipe_format fd3_gmem_restore_format(enum pipe_format format);
enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 2d6ecb2c050..99ae99ea0c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl;
uint32_t sz2 = 0;
if (!so)
@@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->texconst0 =
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
- A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
- so->texconst1 =
+ if (prsc->target == PIPE_BUFFER) {
+ lvl = 0;
+ so->texconst1 =
+ A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+ A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element -
+ cso->u.buf.first_element + 1) |
+ A3XX_TEX_CONST_1_HEIGHT(1);
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+
+ so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ }
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 9f970365464..a450379e98d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -47,11 +47,13 @@ enum a4xx_color_fmt {
RB4_R8_UNORM = 2,
RB4_R4G4B4A4_UNORM = 8,
RB4_R5G5B5A1_UNORM = 10,
- RB4_R5G6R5_UNORM = 14,
+ RB4_R5G6B5_UNORM = 14,
RB4_R8G8_UNORM = 15,
RB4_R8G8_SNORM = 16,
RB4_R8G8_UINT = 17,
RB4_R8G8_SINT = 18,
+ RB4_R16_UNORM = 19,
+ RB4_R16_SNORM = 20,
RB4_R16_FLOAT = 21,
RB4_R16_UINT = 22,
RB4_R16_SINT = 23,
@@ -63,12 +65,16 @@ enum a4xx_color_fmt {
RB4_R10G10B10A2_UNORM = 31,
RB4_R10G10B10A2_UINT = 34,
RB4_R11G11B10_FLOAT = 39,
+ RB4_R16G16_UNORM = 40,
+ RB4_R16G16_SNORM = 41,
RB4_R16G16_FLOAT = 42,
RB4_R16G16_UINT = 43,
RB4_R16G16_SINT = 44,
RB4_R32_FLOAT = 45,
RB4_R32_UINT = 46,
RB4_R32_SINT = 47,
+ RB4_R16G16B16A16_UNORM = 52,
+ RB4_R16G16B16A16_SNORM = 53,
RB4_R16G16B16A16_FLOAT = 54,
RB4_R16G16B16A16_UINT = 55,
RB4_R16G16B16A16_SINT = 56,
@@ -106,6 +112,7 @@ enum a4xx_vtx_fmt {
VFMT4_32_32_FIXED = 10,
VFMT4_32_32_32_FIXED = 11,
VFMT4_32_32_32_32_FIXED = 12,
+ VFMT4_11_11_10_FLOAT = 13,
VFMT4_16_SINT = 16,
VFMT4_16_16_SINT = 17,
VFMT4_16_16_16_SINT = 18,
@@ -146,18 +153,19 @@ enum a4xx_vtx_fmt {
VFMT4_8_8_SNORM = 53,
VFMT4_8_8_8_SNORM = 54,
VFMT4_8_8_8_8_SNORM = 55,
- VFMT4_10_10_10_2_UINT = 60,
- VFMT4_10_10_10_2_UNORM = 61,
- VFMT4_10_10_10_2_SINT = 62,
- VFMT4_10_10_10_2_SNORM = 63,
+ VFMT4_10_10_10_2_UINT = 56,
+ VFMT4_10_10_10_2_UNORM = 57,
+ VFMT4_10_10_10_2_SINT = 58,
+ VFMT4_10_10_10_2_SNORM = 59,
};
enum a4xx_tex_fmt {
TFMT4_5_6_5_UNORM = 11,
- TFMT4_5_5_5_1_UNORM = 10,
+ TFMT4_5_5_5_1_UNORM = 9,
TFMT4_4_4_4_4_UNORM = 8,
TFMT4_X8Z24_UNORM = 71,
TFMT4_10_10_10_2_UNORM = 33,
+ TFMT4_10_10_10_2_UINT = 34,
TFMT4_A8_UNORM = 3,
TFMT4_L8_A8_UNORM = 13,
TFMT4_8_UNORM = 4,
@@ -172,6 +180,12 @@ enum a4xx_tex_fmt {
TFMT4_8_SINT = 7,
TFMT4_8_8_SINT = 17,
TFMT4_8_8_8_8_SINT = 31,
+ TFMT4_16_UNORM = 18,
+ TFMT4_16_16_UNORM = 38,
+ TFMT4_16_16_16_16_UNORM = 51,
+ TFMT4_16_SNORM = 19,
+ TFMT4_16_16_SNORM = 39,
+ TFMT4_16_16_16_16_SNORM = 52,
TFMT4_16_UINT = 21,
TFMT4_16_16_UINT = 41,
TFMT4_16_16_16_16_UINT = 54,
@@ -190,8 +204,21 @@ enum a4xx_tex_fmt {
TFMT4_32_FLOAT = 43,
TFMT4_32_32_FLOAT = 56,
TFMT4_32_32_32_32_FLOAT = 63,
+ TFMT4_32_32_32_FLOAT = 59,
+ TFMT4_32_32_32_UINT = 60,
+ TFMT4_32_32_32_SINT = 61,
TFMT4_9_9_9_E5_FLOAT = 32,
TFMT4_11_11_10_FLOAT = 37,
+ TFMT4_DXT1 = 86,
+ TFMT4_DXT3 = 87,
+ TFMT4_DXT5 = 88,
+ TFMT4_RGTC1_UNORM = 90,
+ TFMT4_RGTC1_SNORM = 91,
+ TFMT4_RGTC2_UNORM = 94,
+ TFMT4_RGTC2_SNORM = 95,
+ TFMT4_BPTC_UFLOAT = 97,
+ TFMT4_BPTC_FLOAT = 98,
+ TFMT4_BPTC = 99,
TFMT4_ATC_RGB = 100,
TFMT4_ATC_RGBA_EXPLICIT = 101,
TFMT4_ATC_RGBA_INTERPOLATED = 102,
@@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4
#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008
#define A4XX_RB_MRT_CONTROL_BLEND 0x00000010
#define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020
-#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400
-#define A4XX_RB_MRT_CONTROL_B11 0x00000800
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+ return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24
static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
@@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
{
return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
}
-#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16
static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
@@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003
#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001
@@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000
#define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000
-#define REG_A4XX_UNKNOWN_21C5 0x000021c5
+#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040
#define REG_A4XX_PC_RESTART_INDEX 0x000021c6
@@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
{
return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
}
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+ return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
#define REG_A4XX_TEX_SAMP_1 0x00000001
#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e
@@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val
{
return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
}
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010
#define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020
#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040
#define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00
@@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
{
return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
}
-#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000
#define A4XX_TEX_CONST_1_WIDTH__SHIFT 15
static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
{
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index d5e823ef69d..f19702280e0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -27,6 +27,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
@@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
{
struct fd4_blend_stateobj *so;
-// enum a3xx_rop_code rop = ROP_COPY;
+ enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
-// rop = cso->logicop_func; /* maps 1:1 */
+ rop = cso->logicop_func; /* maps 1:1 */
switch (cso->logicop_func) {
case PIPE_LOGICOP_NOR:
@@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx,
else
rt = &cso->rt[0];
- so->rb_mrt[i].blend_control =
+ so->rb_mrt[i].blend_control_rgb =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+
+ so->rb_mrt[i].blend_control_alpha =
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+ so->rb_mrt[i].blend_control_no_alpha_rgb =
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
+
so->rb_mrt[i].control =
- 0xc00 | /* XXX ROP_CODE ?? */
+ A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) {
@@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx,
mrt_blend |= (1 << i);
}
- if (reads_dest)
+ if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ mrt_blend |= (1 << i);
+ }
if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
- so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+ so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
+ COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 7620d00a625..6230fa7a50e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -39,7 +39,12 @@ struct fd4_blend_stateobj {
struct {
uint32_t control;
uint32_t buf_info;
- uint32_t blend_control;
+ /* Blend control bits for color if there is an alpha channel */
+ uint32_t blend_control_rgb;
+ /* Blend control bits for color if there is no alpha channel */
+ uint32_t blend_control_no_alpha_rgb;
+ /* Blend control bits for alpha channel */
+ uint32_t blend_control_alpha;
} rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 7bd5163529a..8cbe68d5790 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit)
{
const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
return;
@@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer->point_size_per_vertex &&
+ fd4_emit_get_vp(emit)->writes_psize &&
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
fd4_draw_emit(ctx, ring,
+ primtype,
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info);
}
@@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index b89a30a7c4b..a6c56404a8a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size)
}
static inline void
fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
@@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- /* points + psize -> spritelist: */
- if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
- (info->mode == PIPE_PRIM_POINTS))
- primtype = DI_PT_POINTLIST_PSIZE;
-
fd4_draw(ctx, ring, primtype, vismode, src_sel,
info->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_bo);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 26b58718cd8..f220fc7ac1f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- unsigned start = fd_sampler_first_level(&view->base);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
@@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, view->texconst3);
if (view->base.texture) {
struct fd_resource *rsc = fd_resource(view->base.texture);
- uint32_t offset = fd_resource_offset(rsc, start, 0);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
} else {
OUT_RING(ring, 0x00000000);
}
@@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, offset, 0, 0);
OUT_RING(ring, 0x00000000);
@@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
-
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+ A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
0xa0000 | /* XXX */
@@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
}
- if (dirty & FD_DIRTY_ZSA) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+ if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+ rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, zsa->rb_alpha_control);
+ OUT_RING(ring, rb_alpha_control);
OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
OUT_RING(ring, zsa->rb_stencil_control);
@@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
*/
if (emit->info) {
const struct pipe_draw_info *info = emit->info;
- uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
- ->pc_prim_vtx_cntl;
+ struct fd4_rasterizer_stateobj *rast =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+ uint32_t val = rast->pc_prim_vtx_cntl;
if (info->indexed && info->primitive_restart)
val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
@@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
OUT_RING(ring, val);
- OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */
+ OUT_RING(ring, rast->pc_prim_vtx_cntl2);
}
if (dirty & FD_DIRTY_SCISSOR) {
@@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG) {
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
@@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t i;
for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format format = pipe_surface_format(
+ ctx->framebuffer.cbufs[i]);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+ uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+
+ if (is_int) {
+ control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (has_alpha) {
+ blend_control |= blend->rb_mrt[i].blend_control_rgb;
+ } else {
+ blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+ control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].control);
+ OUT_RING(ring, control);
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ OUT_RING(ring, blend_control);
}
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
@@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
}
- if (dirty & FD_DIRTY_BLEND_COLOR) {
+ if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_blend_color *bcolor = &ctx->blend_color;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ float factor = 65535.0;
+ int i;
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
+ const struct util_format_description *desc =
+ util_format_description(format);
+ int j;
+
+ if (desc->is_mixed)
+ continue;
+
+ j = util_format_get_first_non_void_channel(format);
+ if (j == -1)
+ continue;
+
+ if (desc->channel[j].size > 8 || !desc->channel[j].normalized ||
+ desc->channel[j].pure_integer)
+ continue;
+
+ /* Just use the first unorm8/snorm8 render buffer. Can't keep
+ * everyone happy.
+ */
+ if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED)
+ factor = 32767.0;
+ break;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
- OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) |
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) |
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) |
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) |
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 847d4fb6d63..c240745cec1 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(S8_UINT, 8_UINT, R8_UNORM, WZYX),
/* 16-bit */
- V_(R16_UNORM, 16_UNORM, NONE, WZYX),
- V_(R16_SNORM, 16_SNORM, NONE, WZYX),
- VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
- VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
- V_(R16_USCALED, 16_UINT, NONE, WZYX),
- V_(R16_SSCALED, 16_UINT, NONE, WZYX),
- VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX),
-
- _T(A16_UINT, 16_UINT, NONE, WZYX),
- _T(A16_SINT, 16_SINT, NONE, WZYX),
- _T(L16_UINT, 16_UINT, NONE, WZYX),
- _T(L16_SINT, 16_SINT, NONE, WZYX),
- _T(I16_UINT, 16_UINT, NONE, WZYX),
- _T(I16_SINT, 16_SINT, NONE, WZYX),
+ VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX),
+ VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX),
+ VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
+ VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
+ V_(R16_USCALED, 16_UINT, NONE, WZYX),
+ V_(R16_SSCALED, 16_UINT, NONE, WZYX),
+ VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX),
+
+ _T(A16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(A16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(A16_UINT, 16_UINT, NONE, WZYX),
+ _T(A16_SINT, 16_SINT, NONE, WZYX),
+ _T(L16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(L16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(L16_UINT, 16_UINT, NONE, WZYX),
+ _T(L16_SINT, 16_SINT, NONE, WZYX),
+ _T(I16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(I16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(I16_UINT, 16_UINT, NONE, WZYX),
+ _T(I16_SINT, 16_SINT, NONE, WZYX),
VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX),
VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX),
@@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L8A8_UINT, 8_8_UINT, NONE, WZYX),
_T(L8A8_SINT, 8_8_SINT, NONE, WZYX),
+ _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ),
_T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
@@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(I32_UINT, 32_UINT, NONE, WZYX),
_T(I32_SINT, 32_SINT, NONE, WZYX),
- V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX),
- V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX),
- VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
- VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
- V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
- V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
- VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX),
+ VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX),
+ VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX),
+ VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
+ VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
+ V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
+ V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
+ VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX),
- _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
- _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
+ _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX),
+ _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX),
+ _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
+ _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
_T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
- V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
+ VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX),
+ VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
- _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+ VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
_T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX),
/* 64-bit */
- V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX),
- V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX),
+ VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+ VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
_T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX),
@@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L32A32_SINT, 32_32_SINT, NONE, WZYX),
/* 96-bit */
- V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
- V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
+ VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
+ VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX),
V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX),
- V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
+ VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX),
/* 128-bit */
@@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
_T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX),
+
+ /* compressed */
+ _T(ETC1_RGB8, ETC1, NONE, WZYX),
+ _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+ _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+ _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+ _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+ _T(DXT1_RGB, DXT1, NONE, WZYX),
+ _T(DXT1_SRGB, DXT1, NONE, WZYX),
+ _T(DXT1_RGBA, DXT1, NONE, WZYX),
+ _T(DXT1_SRGBA, DXT1, NONE, WZYX),
+ _T(DXT3_RGBA, DXT3, NONE, WZYX),
+ _T(DXT3_SRGBA, DXT3, NONE, WZYX),
+ _T(DXT5_RGBA, DXT5, NONE, WZYX),
+ _T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX),
+ _T(BPTC_SRGBA, BPTC, NONE, WZYX),
+ _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX),
+ _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
+
+ _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+
+ _T(ASTC_4x4, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12, ASTC_12x12, NONE, WZYX),
+
+ _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX),
};
/* convert pipe format to vertex buffer format: */
@@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format)
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
- switch (util_format_get_blocksizebits(format)) {
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
+ return TFETCH4_16_BYTE;
+
+ switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH4_1_BYTE;
case 16: return TFETCH4_2_BYTE;
case 32: return TFETCH4_4_BYTE;
case 64: return TFETCH4_8_BYTE;
+ case 96: return TFETCH4_1_BYTE; /* Does this matter? */
case 128: return TFETCH4_16_BYTE;
default:
debug_printf("Unknown block size for format %s: %d\n",
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 3f8bbf3a124..221608127b4 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e3d5dabab4c..3df13543148 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
- /* adjust regids for alpha output formats. there is no alpha render
- * format, so it's just treated like red
- */
- for (i = 0; i < nr; i++)
- if (util_format_is_alpha(pipe_surface_format(bufs[i])))
- color_regid[i] += 3;
-
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index dc7e98b149d..7456c63febe 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+ so->pc_prim_vtx_cntl2 =
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
@@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+ if (!cso->depth_clip)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ if (cso->clip_halfz)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 64e81a9983b..b56a04da6a8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj {
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
+ uint32_t pc_prim_vtx_cntl2;
};
static inline struct fd4_rasterizer_stateobj *
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index d8ea414f300..b2a69cca56c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen,
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (target == PIPE_BUFFER ||
+ util_format_get_blocksize(format) != 12) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index dbff5a738fd..0eba75577b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx,
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+ COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |=
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
@@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl, layers;
+ uint32_t sz2 = 0;
if (!so)
return NULL;
@@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base.context = pctx;
so->texconst0 =
- A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
- A4XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
- so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
- A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.last_element -
+ cso->u.buf.first_element + 1;
+ lvl = 0;
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(elements) |
+ A4XX_TEX_CONST_1_HEIGHT(1);
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+ so->offset = cso->u.buf.first_element *
+ util_format_get_blocksize(cso->format);
+ } else {
+ unsigned miplevels;
- switch (prsc->target) {
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(
+ util_format_get_nblocksx(
+ cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+
+ switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size) |
+ A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
+ A4XX_TEX_CONST_3_DEPTH(layers / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
- A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0);
+ while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+ sz2 = rsc->slices[++lvl].size0;
+ so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2);
break;
default:
so->texconst3 = 0x00000000;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 31955770a85..6ca34ade60d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
struct fd4_pipe_sampler_view {
struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
+ uint32_t offset;
};
static inline struct fd4_pipe_sampler_view *
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index ca3d2ac3fca..0e0f0e65e9b 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode {
RB_COPY_DEPTH_STENCIL = 5,
};
+enum a3xx_rop_code {
+ ROP_CLEAR = 0,
+ ROP_NOR = 1,
+ ROP_AND_INVERTED = 2,
+ ROP_COPY_INVERTED = 3,
+ ROP_AND_REVERSE = 4,
+ ROP_INVERT = 5,
+ ROP_XOR = 6,
+ ROP_NAND = 7,
+ ROP_AND = 8,
+ ROP_EQUIV = 9,
+ ROP_NOOP = 10,
+ ROP_OR_INVERTED = 11,
+ ROP_COPY = 12,
+ ROP_OR_REVERSE = 13,
+ ROP_OR = 14,
+ ROP_SET = 15,
+};
+
enum a3xx_render_mode {
RB_RENDERING_PASS = 0,
RB_TILING_PASS = 1,
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index f095e3061b2..4aabc086607 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 61c4c6d6e24..571c8142bf7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -359,6 +359,10 @@ struct fd_context {
struct fd_streamout_stateobj streamout;
struct pipe_clip_state ucp;
+ struct pipe_query *cond_query;
+ bool cond_cond; /* inverted rendering condition */
+ uint cond_mode;
+
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);
void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile);
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 7bf3343f43a..bf803cc77bc 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
return;
}
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* emulate unsupported primitives: */
if (!fd_supported_prim(ctx, info->mode)) {
if (ctx->streamout.num_targets > 0)
@@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
unsigned cleared_buffers;
int i;
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* for bookkeeping about which buffers have been cleared (and thus
* can fully or partially skip mem2gmem) we need to ignore buffers
* that have already had a draw, in case apps do silly things like
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index db2683c9b6f..b87e8250719 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
}
+static void
+fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->cond_query = pq;
+ ctx->cond_cond = condition;
+ ctx->cond_mode = mode;
+}
+
static int
fd_get_driver_query_info(struct pipe_screen *pscreen,
unsigned index, struct pipe_driver_query_info *info)
@@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx)
pctx->begin_query = fd_begin_query;
pctx->end_query = fd_end_query;
pctx->get_query_result = fd_get_query_result;
+ pctx->render_condition = fd_render_condition;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 98de0969cab..63ca9e30620 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -27,6 +27,7 @@
*/
#include "util/u_format.h"
+#include "util/u_format_rgtc.h"
#include "util/u_format_zs.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
util_range_set_empty(&rsc->valid_buffer_range);
}
-/* Currently this is only used for flushing Z32_S8 texture transfers, but
- * eventually it should handle everything.
- */
+static unsigned
+fd_resource_layer_offset(struct fd_resource *rsc,
+ struct fd_resource_slice *slice,
+ unsigned layer)
+{
+ if (rsc->layer_first)
+ return layer * rsc->layer_size;
+ else
+ return layer * slice->size0;
+}
+
static void
-fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
{
struct fd_resource *rsc = fd_resource(trans->base.resource);
struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
@@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
enum pipe_format format = trans->base.resource->format;
float *depth = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
- assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
- format == PIPE_FORMAT_X32_S8X24_UINT);
-
if (format != PIPE_FORMAT_X32_S8X24_UINT)
util_format_z32_float_s8x24_uint_unpack_z_float(
depth, slice->pitch * 4,
@@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
box->width, box->height);
}
+static void
+fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ struct fd_resource *rsc = fd_resource(trans->base.resource);
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+ enum pipe_format format = trans->base.resource->format;
+
+ uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
+ ((trans->base.box.y + box->y) * slice->pitch +
+ trans->base.box.x + box->x) * rsc->cpp;
+
+ uint8_t *source = trans->staging +
+ util_format_get_nblocksy(format, box->y) * trans->base.stride +
+ util_format_get_stride(format, box->x);
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format\n");
+ break;
+ }
+}
+
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ enum pipe_format format = trans->base.resource->format;
+
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ fd_resource_flush_z32s8(trans, box);
+ break;
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ fd_resource_flush_rgtc(trans, box);
+ break;
+ default:
+ assert(!"Unexpected staging transfer type");
+ break;
+ }
+}
+
static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
@@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
return NULL;
}
- if (rsc->layer_first) {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * rsc->layer_size;
- } else {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * slice->size0;
- }
+ offset = slice->offset +
+ box->y / util_format_get_blockheight(format) * ptrans->stride +
+ box->x / util_format_get_blockwidth(format) * rsc->cpp +
+ fd_resource_layer_offset(rsc, slice, box->z);
if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+ assert(trans->base.box.depth == 1);
+
trans->base.stride = trans->base.box.width * rsc->cpp * 2;
trans->staging = malloc(trans->base.stride * trans->base.box.height);
if (!trans->staging)
@@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx,
goto fail;
float *depth = (float *)(buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
box->y * slice->pitch * 4 + box->x * 4);
uint8_t *stencil = sbuf + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
box->y * sslice->pitch + box->x;
if (format != PIPE_FORMAT_X32_S8X24_UINT)
@@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx,
buf = trans->staging;
offset = 0;
+ } else if (rsc->internal_format != format &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ assert(trans->base.box.depth == 1);
+
+ trans->base.stride = util_format_get_stride(
+ format, trans->base.box.width);
+ trans->staging = malloc(
+ util_format_get_2d_size(format, trans->base.stride,
+ trans->base.box.height));
+ if (!trans->staging)
+ goto fail;
+
+ /* if we're not discarding the whole range (or resource), we must copy
+ * the real data in.
+ */
+ if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+ PIPE_TRANSFER_DISCARD_RANGE))) {
+ uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
+ box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format");
+ break;
+ }
+ }
+
+ buf = trans->staging;
+ offset = 0;
}
*pptrans = ptrans;
@@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = {
};
static uint32_t
-setup_slices(struct fd_resource *rsc, uint32_t alignment)
+setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
{
struct pipe_resource *prsc = &rsc->base.b;
+ enum util_format_layout layout = util_format_description(format)->layout;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
@@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
uint32_t blocks;
- slice->pitch = width = align(width, 32);
+ if (layout == UTIL_FORMAT_LAYOUT_ASTC)
+ slice->pitch = width =
+ util_align_npot(width, 32 * util_format_get_blockwidth(format));
+ else
+ slice->pitch = width = align(width, 32);
slice->offset = size;
- blocks = util_format_get_nblocks(prsc->format, width, height);
+ blocks = util_format_get_nblocks(format, width, height);
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen,
{
struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
struct pipe_resource *prsc = &rsc->base.b;
- uint32_t size;
+ enum pipe_format format = tmpl->format;
+ uint32_t size, alignment;
DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
"nr_samples=%u, usage=%u, bind=%x, flags=%x",
- tmpl->target, util_format_name(tmpl->format),
+ tmpl->target, util_format_name(format),
tmpl->width0, tmpl->height0, tmpl->depth0,
tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
tmpl->usage, tmpl->bind, tmpl->flags);
@@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen,
util_range_init(&rsc->valid_buffer_range);
rsc->base.vtbl = &fd_resource_vtbl;
- if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
- else
- rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
+ else if (fd_screen(pscreen)->gpu_id < 400 &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ rsc->internal_format = format;
+ rsc->cpp = util_format_get_blocksize(format);
assert(rsc->cpp);
+ alignment = slice_alignment(pscreen, tmpl);
if (is_a4xx(fd_screen(pscreen))) {
switch (tmpl->target) {
case PIPE_TEXTURE_3D:
@@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen,
break;
default:
rsc->layer_first = true;
+ alignment = 1;
break;
}
}
- size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+ size = setup_slices(rsc, alignment, format);
if (rsc->layer_first) {
rsc->layer_size = align(size, 4096);
@@ -548,7 +680,7 @@ fail:
return NULL;
}
-static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
static void fd_blitter_pipe_end(struct fd_context *ctx);
/**
@@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx,
if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
return false;
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, false);
util_blitter_copy_texture(ctx->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
@@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx,
src, src_level, src_box);
}
+bool
+fd_render_condition_check(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (!ctx->cond_query)
+ return true;
+
+ union pipe_query_result res = { 0 };
+ bool wait =
+ ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+ ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+ if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
+ return (bool)res.u64 != ctx->cond_cond;
+
+ return true;
+}
+
/**
* Optimal hardware path for blitting pixels.
* Scaling, format conversion, up- and downsampling (resolve) are allowed.
@@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
+ if (info.render_condition_enable && !fd_render_condition_check(pctx))
+ return;
+
if (util_try_blit_via_copy_region(pctx, &info)) {
return; /* done */
}
@@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, info.render_condition_enable);
util_blitter_blit(ctx->blitter, &info);
fd_blitter_pipe_end(ctx);
}
static void
-fd_blitter_pipe_begin(struct fd_context *ctx)
+fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond)
{
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
@@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
(void **)ctx->fragtex.samplers);
util_blitter_save_fragment_sampler_views(ctx->blitter,
ctx->fragtex.num_textures, ctx->fragtex.textures);
+ if (!render_cond)
+ util_blitter_save_render_condition(ctx->blitter,
+ ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 7549becaa1f..9a9b0d08244 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -73,6 +73,7 @@ struct fd_resource {
struct u_resource base;
struct fd_bo *bo;
uint32_t cpp;
+ enum pipe_format internal_format;
bool layer_first; /* see above description */
uint32_t layer_size;
struct fd_resource_slice slices[MAX_MIP_LEVELS];
@@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
void fd_resource_screen_init(struct pipe_screen *pscreen);
void fd_resource_context_init(struct pipe_context *pctx);
+bool fd_render_condition_check(struct pipe_context *pctx);
+
#endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 56d1834ef9c..5bbe4016a2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
return 0;
@@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- /* ignoring first/last_element.. but I guess that should be
- * easy to add..
- */
+ if (is_a3xx(screen)) return 16;
+ if (is_a4xx(screen)) return 32;
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* I think 32k on a4xx.. and we could possibly emulate more
- * by pretending 2d/rect textures and splitting high bits
- * of index into 2nd dimension..
+ /* We could possibly emulate more by pretending 2d/rect textures and
+ * splitting high bits of index into 2nd dimension..
*/
- return 16383;
-
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- return is_a3xx(screen);
+ if (is_a3xx(screen)) return 8192;
+ if (is_a4xx(screen)) return 16384;
+ return 0;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
return is_a4xx(screen);
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (glsl120)
return 120;
- return is_ir3(screen) ? 130 : 120;
+ return is_ir3(screen) ? 140 : 120;
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
- case PIPE_CAP_FAKE_SW_MSAA:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index 04e4643b4c9..f5611abaec8 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
continue;
const struct util_format_channel_description *chan =
- &desc->channel[desc->swizzle[j]];
- int size = chan->size;
-
- /* The Z16 texture format we use seems to look in the
- * 32-bit border color slots
- */
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
- size = 32;
-
- /* Formats like R11G11B10 or RGB9_E5 don't specify
- * per-channel sizes properly.
- */
- if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
- size = 16;
-
- if (chan->pure_integer && size > 16)
- bcolor32[desc->swizzle[j] + 4] =
- sampler->border_color.i[j];
- else if (size > 16)
- bcolor32[desc->swizzle[j]] =
- fui(sampler->border_color.f[j]);
- else if (chan->pure_integer)
- bcolor[desc->swizzle[j] + 8] =
- sampler->border_color.i[j];
- else
+ &desc->channel[desc->swizzle[j]];
+ if (chan->pure_integer) {
+ bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
+ bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
+ } else {
+ bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
bcolor[desc->swizzle[j]] =
- util_float_to_half(sampler->border_color.f[j]);
+ util_float_to_half(sampler->border_color.f[j]);
+ }
}
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73a3c6..156bb0be247 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
break;
+ case nir_op_bit_count:
+ dst[0] = ir3_CBITS_B(b, src[0], 0);
+ break;
+ case nir_op_ifind_msb: {
+ struct ir3_instruction *cmp;
+ dst[0] = ir3_CLZ_S(b, src[0], 0);
+ cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
+ cmp->cat2.condition = IR3_COND_GE;
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ cmp, 0, dst[0], 0);
+ break;
+ }
+ case nir_op_ufind_msb:
+ dst[0] = ir3_CLZ_B(b, src[0], 0);
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ src[0], 0, dst[0], 0);
+ break;
+ case nir_op_find_lsb:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ dst[0] = ir3_CLZ_B(b, dst[0], 0);
+ break;
+ case nir_op_bitfield_reverse:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ break;
+
default:
compile_error(ctx, "Unhandled ALU op: %s\n",
nir_op_infos[alu->op].name);
@@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
unreachable("bad sampler_dim");
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
flags |= IR3_INSTR_S;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
flags |= IR3_INSTR_A;
*flagsp = flags;
@@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_texop_txl: opc = OPC_SAML; break;
case nir_texop_txd: opc = OPC_SAMGQ; break;
case nir_texop_txf: opc = OPC_ISAML; break;
+ case nir_texop_lod: opc = OPC_GETLOD; break;
case nir_texop_txf_ms:
case nir_texop_txs:
- case nir_texop_lod:
case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
+ case nir_texop_samples_identical:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
@@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
src0[nsrc0++] = create_immed(b, fui(0.5));
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
src0[nsrc0++] = compare;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
src0[nsrc0++] = coord[coords];
if (has_proj) {
@@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_type_int:
type = TYPE_S32;
break;
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
type = TYPE_U32;
break;
@@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
unreachable("bad dest_type");
}
+ if (opc == OPC_GETLOD)
+ type = TYPE_U32;
+
sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
flags, tex->sampler_index, tex->sampler_index,
create_collect(b, src0, nsrc0),
create_collect(b, src1, nsrc1));
split_dest(b, dst, sam, 4);
+
+ /* GETLOD returns results in 4.8 fixed point */
+ if (opc == OPC_GETLOD) {
+ struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
+
+ compile_assert(ctx, tex->dest_type == nir_type_float);
+ for (i = 0; i < 2; i++) {
+ dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
+ factor, 0);
+ }
+ }
}
static void
@@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
+ case nir_texop_samples_identical:
+ unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 7e2c27d9765..5d1cccb0daa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -166,7 +166,9 @@ struct ir3_shader_variant {
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_psize;
- /* vertices/inputs: */
+ /* attributes (VS) / varyings (FS):
+ * Note that sysval's should come *after* normal inputs.
+ */
unsigned inputs_count;
struct {
uint8_t slot;
@@ -229,7 +231,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
- struct pipe_context *pctx;
+ struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
const struct tgsi_token *tokens;
struct pipe_stream_output_info stream_output;