summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-11-23 14:03:47 -0800
committerJason Ekstrand <[email protected]>2015-11-23 14:03:47 -0800
commit179fc4aae8f782453f0488e8dd508f9a01117376 (patch)
tree5f0cc77b30d86b581fb968a71ba83c5e4c2546d7 /src/gallium/drivers
parente14b2c76b40398a61f45f5d058079641661a66cb (diff)
parentd9b8fde963a53d4e06570d8bece97f806714507a (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in nir cloning and some much-needed upstream refactors.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c60
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.c24
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.h1
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c23
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h80
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.c27
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c11
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.h7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c132
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_format.c147
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c12
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h1
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c56
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.h3
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h23
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c8
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.c11
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c221
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c36
-rw-r--r--src/gallium/drivers/freedreno/freedreno_texture.c34
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c56
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h6
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp46
-rw-r--r--src/gallium/drivers/nouveau/nouveau_buffer.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c320
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h444
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c27
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h9
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c42
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.h6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c47
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.h16
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c207
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c417
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h45
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c65
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h19
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c99
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c15
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c12
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources2
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c59
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h18
-rw-r--r--src/gallium/drivers/radeon/r600_query.c1017
-rw-r--r--src/gallium/drivers/radeon/r600_query.h136
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c24
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.h3
-rw-r--r--src/gallium/drivers/radeon/radeon_vce_52.c242
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c28
-rw-r--r--src/gallium/drivers/softpipe/Automake.inc5
-rw-r--r--src/gallium/drivers/svga/svga_context.h15
-rw-r--r--src/gallium/drivers/svga/svga_format.c148
-rw-r--r--src/gallium/drivers/svga/svga_format.h4
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c9
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c5
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c4
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c6
-rw-r--r--src/gallium/drivers/svga/svga_screen.c2
-rw-r--r--src/gallium/drivers/svga/svga_state_sampler.c3
-rw-r--r--src/gallium/drivers/trace/tr_screen.c3
-rw-r--r--src/gallium/drivers/vc4/Automake.inc4
-rw-r--r--src/gallium/drivers/vc4/Makefile.am1
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c56
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_reorder_uniforms.c26
93 files changed, 4122 insertions, 889 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index ef235734755..77f708f449c 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index b5e1ddadde0..a6940dfefea 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -111,10 +111,14 @@ enum a3xx_vtx_fmt {
VFMT_8_8_SNORM = 53,
VFMT_8_8_8_SNORM = 54,
VFMT_8_8_8_8_SNORM = 55,
- VFMT_10_10_10_2_UINT = 60,
- VFMT_10_10_10_2_UNORM = 61,
- VFMT_10_10_10_2_SINT = 62,
- VFMT_10_10_10_2_SNORM = 63,
+ VFMT_10_10_10_2_UINT = 56,
+ VFMT_10_10_10_2_UNORM = 57,
+ VFMT_10_10_10_2_SINT = 58,
+ VFMT_10_10_10_2_SNORM = 59,
+ VFMT_2_10_10_10_UINT = 60,
+ VFMT_2_10_10_10_UNORM = 61,
+ VFMT_2_10_10_10_SINT = 62,
+ VFMT_2_10_10_10_SNORM = 63,
};
enum a3xx_tex_fmt {
@@ -138,10 +142,12 @@ enum a3xx_tex_fmt {
TFMT_DXT1 = 36,
TFMT_DXT3 = 37,
TFMT_DXT5 = 38,
+ TFMT_2_10_10_10_UNORM = 40,
TFMT_10_10_10_2_UNORM = 41,
TFMT_9_9_9_E5_FLOAT = 42,
TFMT_11_11_10_FLOAT = 43,
TFMT_A8_UNORM = 44,
+ TFMT_L8_UNORM = 45,
TFMT_L8_A8_UNORM = 47,
TFMT_8_UNORM = 48,
TFMT_8_8_UNORM = 49,
@@ -183,6 +189,8 @@ enum a3xx_tex_fmt {
TFMT_32_SINT = 92,
TFMT_32_32_SINT = 93,
TFMT_32_32_32_32_SINT = 95,
+ TFMT_2_10_10_10_UINT = 96,
+ TFMT_10_10_10_2_UINT = 97,
TFMT_ETC2_RG11_SNORM = 112,
TFMT_ETC2_RG11_UNORM = 113,
TFMT_ETC2_R11_SNORM = 114,
@@ -215,6 +223,9 @@ enum a3xx_color_fmt {
RB_R8_UINT = 14,
RB_R8_SINT = 15,
RB_R10G10B10A2_UNORM = 16,
+ RB_A2R10G10B10_UNORM = 17,
+ RB_R10G10B10A2_UINT = 18,
+ RB_A2R10G10B10_UINT = 19,
RB_A8_UNORM = 20,
RB_R8_UNORM = 21,
RB_R16_FLOAT = 24,
@@ -251,25 +262,6 @@ enum a3xx_sp_perfcounter_select {
SP_ALU_ACTIVE_CYCLES = 29,
};
-enum a3xx_rop_code {
- ROP_CLEAR = 0,
- ROP_NOR = 1,
- ROP_AND_INVERTED = 2,
- ROP_COPY_INVERTED = 3,
- ROP_AND_REVERSE = 4,
- ROP_INVERT = 5,
- ROP_XOR = 6,
- ROP_NAND = 7,
- ROP_AND = 8,
- ROP_EQUIV = 9,
- ROP_NOOP = 10,
- ROP_OR_INVERTED = 11,
- ROP_COPY = 12,
- ROP_OR_REVERSE = 13,
- ROP_OR = 14,
- ROP_SET = 15,
-};
-
enum a3xx_rb_blend_opcode {
BLEND_DST_PLUS_SRC = 0,
BLEND_SRC_MINUS_DST = 1,
@@ -1620,12 +1612,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
}
#define REG_A3XX_VFD_CONTROL_1 0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f
#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0
static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
{
return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
}
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000
#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16
static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 3906c9b996e..b8a31d84b3f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
if (ctx->rasterizer->point_size_per_vertex &&
+ fd3_emit_get_vp(emit)->writes_psize &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 8f9c8b0623c..24afbc9e956 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -209,13 +209,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd3_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = fd_sampler_first_level(&view->base);
- unsigned end = fd_sampler_last_level(&view->base);;
+ if (rsc && rsc->base.b.target == PIPE_BUFFER) {
+ OUT_RELOC(ring, rsc->bo, view->base.u.buf.first_element *
+ util_format_get_blocksize(view->base.format), 0, 0);
+ j = 1;
+ } else {
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);;
- for (j = 0; j < (end - start + 1); j++) {
- struct fd_resource_slice *slice =
+ for (j = 0; j < (end - start + 1); j++) {
+ struct fd_resource_slice *slice =
fd_resource_slice(rsc, j + start);
- OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ }
}
/* pad the remaining entries w/ null: */
@@ -350,7 +356,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -369,18 +378,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -424,6 +426,38 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) |
+ A3XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A3XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 857d156c869..52ea9444517 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -188,9 +188,13 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_UINT, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
_T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
@@ -271,6 +275,16 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
_T(DXT3_SRGBA, DXT3, NONE, WZYX),
_T(DXT5_RGBA, DXT5, NONE, WZYX),
_T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ /* faked */
+ _T(RGTC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, 8_8_8_8_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, 8_8_8_8_SNORM, NONE, WZYX),
};
enum a3xx_vtx_fmt
@@ -310,6 +324,8 @@ fd3_pipe2fetchsize(enum pipe_format format)
{
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
+ else if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH_1_BYTE;
case 16: return TFETCH_2_BYTE;
@@ -324,6 +340,14 @@ fd3_pipe2fetchsize(enum pipe_format format)
}
}
+unsigned
+fd3_pipe2nblocksx(enum pipe_format format, unsigned width)
+{
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ return util_format_get_nblocksx(format, width);
+}
+
/* we need to special case a bit the depth/stencil restore, because we are
* using the texture sampler to blit into the depth/stencil buffer, *not*
* into a color buffer. Otherwise fd3_tex_swiz() will do the wrong thing,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 05c5ea3d247..48c503e9a82 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -37,6 +37,7 @@ enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
enum pipe_format fd3_gmem_restore_format(enum pipe_format format);
enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+unsigned fd3_pipe2nblocksx(enum pipe_format format, unsigned width);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 2d6ecb2c050..99ae99ea0c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -211,8 +211,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl;
uint32_t sz2 = 0;
if (!so)
@@ -227,20 +226,34 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->texconst0 =
A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
- A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
- so->texconst1 =
+ if (prsc->target == PIPE_BUFFER) {
+ lvl = 0;
+ so->texconst1 =
+ A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+ A3XX_TEX_CONST_1_WIDTH(cso->u.buf.last_element -
+ cso->u.buf.first_element + 1) |
+ A3XX_TEX_CONST_1_HEIGHT(1);
+ } else {
+ unsigned miplevels;
+
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+
+ so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ }
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 9f970365464..a450379e98d 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -47,11 +47,13 @@ enum a4xx_color_fmt {
RB4_R8_UNORM = 2,
RB4_R4G4B4A4_UNORM = 8,
RB4_R5G5B5A1_UNORM = 10,
- RB4_R5G6R5_UNORM = 14,
+ RB4_R5G6B5_UNORM = 14,
RB4_R8G8_UNORM = 15,
RB4_R8G8_SNORM = 16,
RB4_R8G8_UINT = 17,
RB4_R8G8_SINT = 18,
+ RB4_R16_UNORM = 19,
+ RB4_R16_SNORM = 20,
RB4_R16_FLOAT = 21,
RB4_R16_UINT = 22,
RB4_R16_SINT = 23,
@@ -63,12 +65,16 @@ enum a4xx_color_fmt {
RB4_R10G10B10A2_UNORM = 31,
RB4_R10G10B10A2_UINT = 34,
RB4_R11G11B10_FLOAT = 39,
+ RB4_R16G16_UNORM = 40,
+ RB4_R16G16_SNORM = 41,
RB4_R16G16_FLOAT = 42,
RB4_R16G16_UINT = 43,
RB4_R16G16_SINT = 44,
RB4_R32_FLOAT = 45,
RB4_R32_UINT = 46,
RB4_R32_SINT = 47,
+ RB4_R16G16B16A16_UNORM = 52,
+ RB4_R16G16B16A16_SNORM = 53,
RB4_R16G16B16A16_FLOAT = 54,
RB4_R16G16B16A16_UINT = 55,
RB4_R16G16B16A16_SINT = 56,
@@ -106,6 +112,7 @@ enum a4xx_vtx_fmt {
VFMT4_32_32_FIXED = 10,
VFMT4_32_32_32_FIXED = 11,
VFMT4_32_32_32_32_FIXED = 12,
+ VFMT4_11_11_10_FLOAT = 13,
VFMT4_16_SINT = 16,
VFMT4_16_16_SINT = 17,
VFMT4_16_16_16_SINT = 18,
@@ -146,18 +153,19 @@ enum a4xx_vtx_fmt {
VFMT4_8_8_SNORM = 53,
VFMT4_8_8_8_SNORM = 54,
VFMT4_8_8_8_8_SNORM = 55,
- VFMT4_10_10_10_2_UINT = 60,
- VFMT4_10_10_10_2_UNORM = 61,
- VFMT4_10_10_10_2_SINT = 62,
- VFMT4_10_10_10_2_SNORM = 63,
+ VFMT4_10_10_10_2_UINT = 56,
+ VFMT4_10_10_10_2_UNORM = 57,
+ VFMT4_10_10_10_2_SINT = 58,
+ VFMT4_10_10_10_2_SNORM = 59,
};
enum a4xx_tex_fmt {
TFMT4_5_6_5_UNORM = 11,
- TFMT4_5_5_5_1_UNORM = 10,
+ TFMT4_5_5_5_1_UNORM = 9,
TFMT4_4_4_4_4_UNORM = 8,
TFMT4_X8Z24_UNORM = 71,
TFMT4_10_10_10_2_UNORM = 33,
+ TFMT4_10_10_10_2_UINT = 34,
TFMT4_A8_UNORM = 3,
TFMT4_L8_A8_UNORM = 13,
TFMT4_8_UNORM = 4,
@@ -172,6 +180,12 @@ enum a4xx_tex_fmt {
TFMT4_8_SINT = 7,
TFMT4_8_8_SINT = 17,
TFMT4_8_8_8_8_SINT = 31,
+ TFMT4_16_UNORM = 18,
+ TFMT4_16_16_UNORM = 38,
+ TFMT4_16_16_16_16_UNORM = 51,
+ TFMT4_16_SNORM = 19,
+ TFMT4_16_16_SNORM = 39,
+ TFMT4_16_16_16_16_SNORM = 52,
TFMT4_16_UINT = 21,
TFMT4_16_16_UINT = 41,
TFMT4_16_16_16_16_UINT = 54,
@@ -190,8 +204,21 @@ enum a4xx_tex_fmt {
TFMT4_32_FLOAT = 43,
TFMT4_32_32_FLOAT = 56,
TFMT4_32_32_32_32_FLOAT = 63,
+ TFMT4_32_32_32_FLOAT = 59,
+ TFMT4_32_32_32_UINT = 60,
+ TFMT4_32_32_32_SINT = 61,
TFMT4_9_9_9_E5_FLOAT = 32,
TFMT4_11_11_10_FLOAT = 37,
+ TFMT4_DXT1 = 86,
+ TFMT4_DXT3 = 87,
+ TFMT4_DXT5 = 88,
+ TFMT4_RGTC1_UNORM = 90,
+ TFMT4_RGTC1_SNORM = 91,
+ TFMT4_RGTC2_UNORM = 94,
+ TFMT4_RGTC2_SNORM = 95,
+ TFMT4_BPTC_UFLOAT = 97,
+ TFMT4_BPTC_FLOAT = 98,
+ TFMT4_BPTC = 99,
TFMT4_ATC_RGB = 100,
TFMT4_ATC_RGBA_EXPLICIT = 101,
TFMT4_ATC_RGBA_INTERPOLATED = 102,
@@ -400,8 +427,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4
#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008
#define A4XX_RB_MRT_CONTROL_BLEND 0x00000010
#define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020
-#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400
-#define A4XX_RB_MRT_CONTROL_B11 0x00000800
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+ return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000
#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24
static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
@@ -600,7 +632,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
{
return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
}
-#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16
static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
@@ -2056,6 +2088,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003
#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001
@@ -2596,7 +2630,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000
#define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000
-#define REG_A4XX_UNKNOWN_21C5 0x000021c5
+#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040
#define REG_A4XX_PC_RESTART_INDEX 0x000021c6
@@ -2738,6 +2785,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
{
return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
}
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+ return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
#define REG_A4XX_TEX_SAMP_1 0x00000001
#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e
@@ -2746,6 +2799,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val
{
return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
}
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010
#define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020
#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040
#define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00
@@ -2814,7 +2868,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
{
return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
}
-#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000
#define A4XX_TEX_CONST_1_WIDTH__SHIFT 15
static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
{
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index d5e823ef69d..f19702280e0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -27,6 +27,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
@@ -59,12 +60,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *cso)
{
struct fd4_blend_stateobj *so;
-// enum a3xx_rop_code rop = ROP_COPY;
+ enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
-// rop = cso->logicop_func; /* maps 1:1 */
+ rop = cso->logicop_func; /* maps 1:1 */
switch (cso->logicop_func) {
case PIPE_LOGICOP_NOR:
@@ -98,16 +99,25 @@ fd4_blend_state_create(struct pipe_context *pctx,
else
rt = &cso->rt[0];
- so->rb_mrt[i].blend_control =
+ so->rb_mrt[i].blend_control_rgb =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
- A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+
+ so->rb_mrt[i].blend_control_alpha =
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+ so->rb_mrt[i].blend_control_no_alpha_rgb =
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
+
so->rb_mrt[i].control =
- 0xc00 | /* XXX ROP_CODE ?? */
+ A4XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+ COND(cso->logicop_enable, A4XX_RB_MRT_CONTROL_ROP_ENABLE) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
if (rt->blend_enable) {
@@ -118,14 +128,17 @@ fd4_blend_state_create(struct pipe_context *pctx,
mrt_blend |= (1 << i);
}
- if (reads_dest)
+ if (reads_dest) {
so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+ mrt_blend |= (1 << i);
+ }
if (cso->dither)
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
- so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+ so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
+ COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 7620d00a625..6230fa7a50e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -39,7 +39,12 @@ struct fd4_blend_stateobj {
struct {
uint32_t control;
uint32_t buf_info;
- uint32_t blend_control;
+ /* Blend control bits for color if there is an alpha channel */
+ uint32_t blend_control_rgb;
+ /* Blend control bits for color if there is no alpha channel */
+ uint32_t blend_control_no_alpha_rgb;
+ /* Blend control bits for alpha channel */
+ uint32_t blend_control_alpha;
} rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 7bd5163529a..8cbe68d5790 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -47,6 +47,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd4_emit *emit)
{
const struct pipe_draw_info *info = emit->info;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
return;
@@ -64,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer->point_size_per_vertex &&
+ fd4_emit_get_vp(emit)->writes_psize &&
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
fd4_draw_emit(ctx, ring,
+ primtype,
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
info);
}
@@ -263,8 +271,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index b89a30a7c4b..a6c56404a8a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -101,12 +101,12 @@ fd4_size2indextype(unsigned index_size)
}
static inline void
fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_primtype primtype,
enum pc_di_vis_cull_mode vismode,
const struct pipe_draw_info *info)
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
- enum pc_di_primtype primtype = ctx->primtypes[info->mode];
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
@@ -127,11 +127,6 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- /* points + psize -> spritelist: */
- if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
- (info->mode == PIPE_PRIM_POINTS))
- primtype = DI_PT_POINTLIST_PSIZE;
-
fd4_draw(ctx, ring, primtype, vismode, src_sel,
info->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_bo);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 26b58718cd8..f220fc7ac1f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -185,7 +185,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- unsigned start = fd_sampler_first_level(&view->base);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
@@ -193,8 +192,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, view->texconst3);
if (view->base.texture) {
struct fd_resource *rsc = fd_resource(view->base.texture);
- uint32_t offset = fd_resource_offset(rsc, start, 0);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
} else {
OUT_RING(ring, 0x00000000);
}
@@ -286,7 +284,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
OUT_RING(ring, 0x00000000);
OUT_RELOC(ring, rsc->bo, offset, 0, 0);
OUT_RING(ring, 0x00000000);
@@ -332,7 +331,10 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unsigned instance_regid = regid(63, 0);
unsigned vtxcnt_regid = regid(63, 0);
+ /* Note that sysvals come *after* normal inputs: */
for (i = 0; i < vp->inputs_count; i++) {
+ if (!vp->inputs[i].compmask)
+ continue;
if (vp->inputs[i].sysval) {
switch(vp->inputs[i].slot) {
case SYSTEM_VALUE_BASE_VERTEX:
@@ -351,19 +353,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
unreachable("invalid system value");
break;
}
- } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
+ } else if (i < vtx->vtx->num_elements) {
last = i;
}
}
-
- /* hw doesn't like to be configured for zero vbo's, it seems: */
- if ((vtx->vtx->num_elements == 0) &&
- (vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)) &&
- (vtxcnt_regid == regid(63, 0)))
- return;
-
for (i = 0, j = 0; i <= last; i++) {
assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
@@ -408,6 +402,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
}
}
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (last < 0) {
+ /* just recycle the shader bo, we just need to point to *something*
+ * valid:
+ */
+ struct fd_bo *dummy_vbo = vp->bo;
+ bool switchnext = (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
+ OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
+ A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
+ A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in = 1;
+ j = 1;
+ }
+
OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
0xa0000 | /* XXX */
@@ -470,11 +496,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
}
- if (dirty & FD_DIRTY_ZSA) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ uint32_t rb_alpha_control = zsa->rb_alpha_control;
+
+ if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
+ rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
- OUT_RING(ring, zsa->rb_alpha_control);
+ OUT_RING(ring, rb_alpha_control);
OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
OUT_RING(ring, zsa->rb_stencil_control);
@@ -535,8 +566,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
*/
if (emit->info) {
const struct pipe_draw_info *info = emit->info;
- uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
- ->pc_prim_vtx_cntl;
+ struct fd4_rasterizer_stateobj *rast =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+ uint32_t val = rast->pc_prim_vtx_cntl;
if (info->indexed && info->primitive_restart)
val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
@@ -552,7 +584,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
OUT_RING(ring, val);
- OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */
+ OUT_RING(ring, rast->pc_prim_vtx_cntl2);
}
if (dirty & FD_DIRTY_SCISSOR) {
@@ -581,7 +613,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG) {
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
@@ -599,11 +631,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
uint32_t i;
for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ enum pipe_format format = pipe_surface_format(
+ ctx->framebuffer.cbufs[i]);
+ bool is_int = util_format_is_pure_integer(format);
+ bool has_alpha = util_format_has_alpha(format);
+ uint32_t control = blend->rb_mrt[i].control;
+ uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+
+ if (is_int) {
+ control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+ }
+
+ if (has_alpha) {
+ blend_control |= blend->rb_mrt[i].blend_control_rgb;
+ } else {
+ blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+ control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].control);
+ OUT_RING(ring, control);
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
- OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ OUT_RING(ring, blend_control);
}
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
@@ -611,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
}
- if (dirty & FD_DIRTY_BLEND_COLOR) {
+ if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_blend_color *bcolor = &ctx->blend_color;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ float factor = 65535.0;
+ int i;
+
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
+ const struct util_format_description *desc =
+ util_format_description(format);
+ int j;
+
+ if (desc->is_mixed)
+ continue;
+
+ j = util_format_get_first_non_void_channel(format);
+ if (j == -1)
+ continue;
+
+ if (desc->channel[j].size > 8 || !desc->channel[j].normalized ||
+ desc->channel[j].pure_integer)
+ continue;
+
+ /* Just use the first unorm8/snorm8 render buffer. Can't keep
+ * everyone happy.
+ */
+ if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED)
+ factor = 32767.0;
+ break;
+ }
+
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
- OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) |
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
- OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) |
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
- OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) |
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
- OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
+ OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) |
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 847d4fb6d63..c240745cec1 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -99,20 +99,26 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(S8_UINT, 8_UINT, R8_UNORM, WZYX),
/* 16-bit */
- V_(R16_UNORM, 16_UNORM, NONE, WZYX),
- V_(R16_SNORM, 16_SNORM, NONE, WZYX),
- VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
- VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
- V_(R16_USCALED, 16_UINT, NONE, WZYX),
- V_(R16_SSCALED, 16_UINT, NONE, WZYX),
- VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX),
-
- _T(A16_UINT, 16_UINT, NONE, WZYX),
- _T(A16_SINT, 16_SINT, NONE, WZYX),
- _T(L16_UINT, 16_UINT, NONE, WZYX),
- _T(L16_SINT, 16_SINT, NONE, WZYX),
- _T(I16_UINT, 16_UINT, NONE, WZYX),
- _T(I16_SINT, 16_SINT, NONE, WZYX),
+ VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX),
+ VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX),
+ VT(R16_UINT, 16_UINT, R16_UINT, WZYX),
+ VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
+ V_(R16_USCALED, 16_UINT, NONE, WZYX),
+ V_(R16_SSCALED, 16_UINT, NONE, WZYX),
+ VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX),
+
+ _T(A16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(A16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(A16_UINT, 16_UINT, NONE, WZYX),
+ _T(A16_SINT, 16_SINT, NONE, WZYX),
+ _T(L16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(L16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(L16_UINT, 16_UINT, NONE, WZYX),
+ _T(L16_SINT, 16_SINT, NONE, WZYX),
+ _T(I16_UNORM, 16_UNORM, NONE, WZYX),
+ _T(I16_SNORM, 16_SNORM, NONE, WZYX),
+ _T(I16_UINT, 16_UINT, NONE, WZYX),
+ _T(I16_SINT, 16_SINT, NONE, WZYX),
VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX),
VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX),
@@ -124,6 +130,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L8A8_UINT, 8_8_UINT, NONE, WZYX),
_T(L8A8_SINT, 8_8_SINT, NONE, WZYX),
+ _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ),
_T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
_T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
@@ -151,16 +158,18 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(I32_UINT, 32_UINT, NONE, WZYX),
_T(I32_SINT, 32_SINT, NONE, WZYX),
- V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX),
- V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX),
- VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
- VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
- V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
- V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
- VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX),
+ VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX),
+ VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX),
+ VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX),
+ VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
+ V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
+ V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
+ VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX),
- _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
- _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
+ _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX),
+ _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX),
+ _T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
+ _T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
_T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -191,11 +200,15 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
_T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX),
- V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ),
+ VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX),
+ VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ),
V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX),
+ V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ),
V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX),
+ V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ),
- _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+ VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
_T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX),
_T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
@@ -213,8 +226,10 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX),
/* 64-bit */
- V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX),
- V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX),
+ VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX),
+ VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
+ VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX),
VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
_T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX),
VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX),
@@ -235,11 +250,11 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L32A32_SINT, 32_32_SINT, NONE, WZYX),
/* 96-bit */
- V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
- V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
+ VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX),
+ VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX),
V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX),
V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX),
- V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
+ VT(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX),
V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX),
/* 128-bit */
@@ -252,6 +267,72 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
_T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX),
+
+ /* compressed */
+ _T(ETC1_RGB8, ETC1, NONE, WZYX),
+ _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+ _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+ _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+ _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+ _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+ _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+ _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+ _T(DXT1_RGB, DXT1, NONE, WZYX),
+ _T(DXT1_SRGB, DXT1, NONE, WZYX),
+ _T(DXT1_RGBA, DXT1, NONE, WZYX),
+ _T(DXT1_SRGBA, DXT1, NONE, WZYX),
+ _T(DXT3_RGBA, DXT3, NONE, WZYX),
+ _T(DXT3_SRGBA, DXT3, NONE, WZYX),
+ _T(DXT5_RGBA, DXT5, NONE, WZYX),
+ _T(DXT5_SRGBA, DXT5, NONE, WZYX),
+
+ _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX),
+ _T(BPTC_SRGBA, BPTC, NONE, WZYX),
+ _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX),
+ _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX),
+
+ _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+ _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX),
+ _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX),
+ _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX),
+ _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX),
+
+ _T(ASTC_4x4, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12, ASTC_12x12, NONE, WZYX),
+
+ _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX),
+ _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX),
+ _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX),
+ _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX),
+ _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX),
+ _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX),
+ _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX),
+ _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX),
+ _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX),
+ _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX),
+ _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX),
+ _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX),
+ _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX),
+ _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX),
};
/* convert pipe format to vertex buffer format: */
@@ -295,11 +376,15 @@ fd4_pipe2fetchsize(enum pipe_format format)
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
- switch (util_format_get_blocksizebits(format)) {
+ if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
+ return TFETCH4_16_BYTE;
+
+ switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH4_1_BYTE;
case 16: return TFETCH4_2_BYTE;
case 32: return TFETCH4_4_BYTE;
case 64: return TFETCH4_8_BYTE;
+ case 96: return TFETCH4_1_BYTE; /* Does this matter? */
case 128: return TFETCH4_16_BYTE;
default:
debug_printf("Unknown block size for format %s: %d\n",
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 3f8bbf3a124..221608127b4 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -347,8 +347,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
- OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
- A4XX_RB_MRT_CONTROL_B11 |
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e3d5dabab4c..3df13543148 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -245,13 +245,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
- /* adjust regids for alpha output formats. there is no alpha render
- * format, so it's just treated like red
- */
- for (i = 0; i < nr; i++)
- if (util_format_is_alpha(pipe_surface_format(bufs[i])))
- color_regid[i] += 3;
-
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index dc7e98b149d..7456c63febe 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -77,6 +77,13 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
so->gras_su_mode_control =
A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+ so->pc_prim_vtx_cntl2 =
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+ A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+ if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+ cso->fill_back != PIPE_POLYGON_MODE_FILL)
+ so->pc_prim_vtx_cntl2 |= A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
if (cso->cull_face & PIPE_FACE_FRONT)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
@@ -90,5 +97,10 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
if (cso->offset_tri)
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+ if (!cso->depth_clip)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ if (cso->clip_halfz)
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
+
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 64e81a9983b..b56a04da6a8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -42,6 +42,7 @@ struct fd4_rasterizer_stateobj {
uint32_t gras_su_mode_control;
uint32_t gras_cl_clip_cntl;
uint32_t pc_prim_vtx_cntl;
+ uint32_t pc_prim_vtx_cntl2;
};
static inline struct fd4_rasterizer_stateobj *
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index d8ea414f300..b2a69cca56c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -57,6 +57,8 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen,
}
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (target == PIPE_BUFFER ||
+ util_format_get_blocksize(format) != 12) &&
(fd4_pipe2tex(format) != ~0)) {
retval |= PIPE_BIND_SAMPLER_VIEW;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index dbff5a738fd..0eba75577b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -124,9 +124,11 @@ fd4_sampler_state_create(struct pipe_context *pctx,
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+ COND(!cso->seamless_cube_map, A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->texsamp1 |=
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
@@ -210,8 +212,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = fd_sampler_first_level(cso);
- unsigned miplevels = fd_sampler_last_level(cso) - lvl;
+ unsigned lvl, layers;
+ uint32_t sz2 = 0;
if (!so)
return NULL;
@@ -223,39 +225,65 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->base.context = pctx;
so->texconst0 =
- A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ A4XX_TEX_CONST_0_TYPE(tex_type(cso->target)) |
A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
- A4XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
if (util_format_is_srgb(cso->format))
so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
- so->texconst1 =
- A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
- A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
- so->texconst2 =
- A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
- A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+ if (cso->target == PIPE_BUFFER) {
+ unsigned elements = cso->u.buf.last_element -
+ cso->u.buf.first_element + 1;
+ lvl = 0;
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(elements) |
+ A4XX_TEX_CONST_1_HEIGHT(1);
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+ so->offset = cso->u.buf.first_element *
+ util_format_get_blocksize(cso->format);
+ } else {
+ unsigned miplevels;
- switch (prsc->target) {
+ lvl = fd_sampler_first_level(cso);
+ miplevels = fd_sampler_last_level(cso) - lvl;
+ layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
+
+ so->texconst0 |= A4XX_TEX_CONST_0_MIPLVLS(miplevels);
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+ A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+ so->texconst2 =
+ A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+ A4XX_TEX_CONST_2_PITCH(
+ util_format_get_nblocksx(
+ cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
+ so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
+ }
+
+ switch (cso->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size) |
+ A4XX_TEX_CONST_3_DEPTH(layers) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
+ A4XX_TEX_CONST_3_DEPTH(layers / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_3D:
so->texconst3 =
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
- A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0);
+ while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+ sz2 = rsc->slices[++lvl].size0;
+ so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2);
break;
default:
so->texconst3 = 0x00000000;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 31955770a85..6ca34ade60d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -51,7 +51,8 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
struct fd4_pipe_sampler_view {
struct pipe_sampler_view base;
- uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
+ uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
+ uint32_t offset;
};
static inline struct fd4_pipe_sampler_view *
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index ca3d2ac3fca..0e0f0e65e9b 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -119,6 +119,25 @@ enum adreno_rb_copy_control_mode {
RB_COPY_DEPTH_STENCIL = 5,
};
+enum a3xx_rop_code {
+ ROP_CLEAR = 0,
+ ROP_NOR = 1,
+ ROP_AND_INVERTED = 2,
+ ROP_COPY_INVERTED = 3,
+ ROP_AND_REVERSE = 4,
+ ROP_INVERT = 5,
+ ROP_XOR = 6,
+ ROP_NAND = 7,
+ ROP_AND = 8,
+ ROP_EQUIV = 9,
+ ROP_NOOP = 10,
+ ROP_OR_INVERTED = 11,
+ ROP_COPY = 12,
+ ROP_OR_REVERSE = 13,
+ ROP_OR = 14,
+ ROP_SET = 15,
+};
+
enum a3xx_render_mode {
RB_RENDERING_PASS = 0,
RB_TILING_PASS = 1,
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index f095e3061b2..4aabc086607 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 61c4c6d6e24..571c8142bf7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -359,6 +359,10 @@ struct fd_context {
struct fd_streamout_stateobj streamout;
struct pipe_clip_state ucp;
+ struct pipe_query *cond_query;
+ bool cond_cond; /* inverted rendering condition */
+ uint cond_mode;
+
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);
void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile);
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 7bf3343f43a..bf803cc77bc 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -88,6 +88,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
return;
}
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* emulate unsupported primitives: */
if (!fd_supported_prim(ctx, info->mode)) {
if (ctx->streamout.num_targets > 0)
@@ -220,6 +224,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
unsigned cleared_buffers;
int i;
+ /* TODO: push down the region versions into the tiles */
+ if (!fd_render_condition_check(pctx))
+ return;
+
/* for bookkeeping about which buffers have been cleared (and thus
* can fully or partially skip mem2gmem) we need to ignore buffers
* that have already had a draw, in case apps do silly things like
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index db2683c9b6f..b87e8250719 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -81,6 +81,16 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
}
+static void
+fd_render_condition(struct pipe_context *pctx, struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->cond_query = pq;
+ ctx->cond_cond = condition;
+ ctx->cond_mode = mode;
+}
+
static int
fd_get_driver_query_info(struct pipe_screen *pscreen,
unsigned index, struct pipe_driver_query_info *info)
@@ -118,4 +128,5 @@ fd_query_context_init(struct pipe_context *pctx)
pctx->begin_query = fd_begin_query;
pctx->end_query = fd_end_query;
pctx->get_query_result = fd_get_query_result;
+ pctx->render_condition = fd_render_condition;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 98de0969cab..63ca9e30620 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -27,6 +27,7 @@
*/
#include "util/u_format.h"
+#include "util/u_format_rgtc.h"
#include "util/u_format_zs.h"
#include "util/u_inlines.h"
#include "util/u_transfer.h"
@@ -111,11 +112,19 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
util_range_set_empty(&rsc->valid_buffer_range);
}
-/* Currently this is only used for flushing Z32_S8 texture transfers, but
- * eventually it should handle everything.
- */
+static unsigned
+fd_resource_layer_offset(struct fd_resource *rsc,
+ struct fd_resource_slice *slice,
+ unsigned layer)
+{
+ if (rsc->layer_first)
+ return layer * rsc->layer_size;
+ else
+ return layer * slice->size0;
+}
+
static void
-fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
{
struct fd_resource *rsc = fd_resource(trans->base.resource);
struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
@@ -123,13 +132,12 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
enum pipe_format format = trans->base.resource->format;
float *depth = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
- assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
- format == PIPE_FORMAT_X32_S8X24_UINT);
-
if (format != PIPE_FORMAT_X32_S8X24_UINT)
util_format_z32_float_s8x24_uint_unpack_z_float(
depth, slice->pitch * 4,
@@ -142,6 +150,73 @@ fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
box->width, box->height);
}
+static void
+fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ struct fd_resource *rsc = fd_resource(trans->base.resource);
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+ enum pipe_format format = trans->base.resource->format;
+
+ uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
+ fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
+ ((trans->base.box.y + box->y) * slice->pitch +
+ trans->base.box.x + box->x) * rsc->cpp;
+
+ uint8_t *source = trans->staging +
+ util_format_get_nblocksy(format, box->y) * trans->base.stride +
+ util_format_get_stride(format, box->x);
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_unpack_rgba_8unorm(
+ data, slice->pitch * rsc->cpp,
+ source, trans->base.stride,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format\n");
+ break;
+ }
+}
+
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+ enum pipe_format format = trans->base.resource->format;
+
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ fd_resource_flush_z32s8(trans, box);
+ break;
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ fd_resource_flush_rgtc(trans, box);
+ break;
+ default:
+ assert(!"Unexpected staging transfer type");
+ break;
+ }
+}
+
static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
struct pipe_transfer *ptrans,
const struct pipe_box *box)
@@ -267,20 +342,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
return NULL;
}
- if (rsc->layer_first) {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * rsc->layer_size;
- } else {
- offset = slice->offset +
- box->y / util_format_get_blockheight(format) * ptrans->stride +
- box->x / util_format_get_blockwidth(format) * rsc->cpp +
- box->z * slice->size0;
- }
+ offset = slice->offset +
+ box->y / util_format_get_blockheight(format) * ptrans->stride +
+ box->x / util_format_get_blockwidth(format) * rsc->cpp +
+ fd_resource_layer_offset(rsc, slice, box->z);
if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+ assert(trans->base.box.depth == 1);
+
trans->base.stride = trans->base.box.width * rsc->cpp * 2;
trans->staging = malloc(trans->base.stride * trans->base.box.height);
if (!trans->staging)
@@ -298,8 +368,10 @@ fd_resource_transfer_map(struct pipe_context *pctx,
goto fail;
float *depth = (float *)(buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
box->y * slice->pitch * 4 + box->x * 4);
uint8_t *stencil = sbuf + sslice->offset +
+ fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
box->y * sslice->pitch + box->x;
if (format != PIPE_FORMAT_X32_S8X24_UINT)
@@ -316,6 +388,54 @@ fd_resource_transfer_map(struct pipe_context *pctx,
buf = trans->staging;
offset = 0;
+ } else if (rsc->internal_format != format &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ assert(trans->base.box.depth == 1);
+
+ trans->base.stride = util_format_get_stride(
+ format, trans->base.box.width);
+ trans->staging = malloc(
+ util_format_get_2d_size(format, trans->base.stride,
+ trans->base.box.height));
+ if (!trans->staging)
+ goto fail;
+
+ /* if we're not discarding the whole range (or resource), we must copy
+ * the real data in.
+ */
+ if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+ PIPE_TRANSFER_DISCARD_RANGE))) {
+ uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
+ fd_resource_layer_offset(rsc, slice, box->z) +
+ box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
+
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ util_format_rgtc1_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ util_format_rgtc2_unorm_pack_rgba_8unorm(
+ trans->staging, trans->base.stride,
+ rgba8, slice->pitch * rsc->cpp,
+ box->width, box->height);
+ break;
+ default:
+ assert(!"Unexpected format");
+ break;
+ }
+ }
+
+ buf = trans->staging;
+ offset = 0;
}
*pptrans = ptrans;
@@ -361,9 +481,10 @@ static const struct u_resource_vtbl fd_resource_vtbl = {
};
static uint32_t
-setup_slices(struct fd_resource *rsc, uint32_t alignment)
+setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
{
struct pipe_resource *prsc = &rsc->base.b;
+ enum util_format_layout layout = util_format_description(format)->layout;
uint32_t level, size = 0;
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
@@ -377,9 +498,13 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
uint32_t blocks;
- slice->pitch = width = align(width, 32);
+ if (layout == UTIL_FORMAT_LAYOUT_ASTC)
+ slice->pitch = width =
+ util_align_npot(width, 32 * util_format_get_blockwidth(format));
+ else
+ slice->pitch = width = align(width, 32);
slice->offset = size;
- blocks = util_format_get_nblocks(prsc->format, width, height);
+ blocks = util_format_get_nblocks(format, width, height);
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -430,11 +555,12 @@ fd_resource_create(struct pipe_screen *pscreen,
{
struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
struct pipe_resource *prsc = &rsc->base.b;
- uint32_t size;
+ enum pipe_format format = tmpl->format;
+ uint32_t size, alignment;
DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
"nr_samples=%u, usage=%u, bind=%x, flags=%x",
- tmpl->target, util_format_name(tmpl->format),
+ tmpl->target, util_format_name(format),
tmpl->width0, tmpl->height0, tmpl->depth0,
tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
tmpl->usage, tmpl->bind, tmpl->flags);
@@ -451,13 +577,18 @@ fd_resource_create(struct pipe_screen *pscreen,
util_range_init(&rsc->valid_buffer_range);
rsc->base.vtbl = &fd_resource_vtbl;
- if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
- else
- rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
+ else if (fd_screen(pscreen)->gpu_id < 400 &&
+ util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
+ format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ rsc->internal_format = format;
+ rsc->cpp = util_format_get_blocksize(format);
assert(rsc->cpp);
+ alignment = slice_alignment(pscreen, tmpl);
if (is_a4xx(fd_screen(pscreen))) {
switch (tmpl->target) {
case PIPE_TEXTURE_3D:
@@ -465,11 +596,12 @@ fd_resource_create(struct pipe_screen *pscreen,
break;
default:
rsc->layer_first = true;
+ alignment = 1;
break;
}
}
- size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+ size = setup_slices(rsc, alignment, format);
if (rsc->layer_first) {
rsc->layer_size = align(size, 4096);
@@ -548,7 +680,7 @@ fail:
return NULL;
}
-static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
static void fd_blitter_pipe_end(struct fd_context *ctx);
/**
@@ -570,7 +702,7 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx,
if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
return false;
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, false);
util_blitter_copy_texture(ctx->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
@@ -612,6 +744,25 @@ fd_resource_copy_region(struct pipe_context *pctx,
src, src_level, src_box);
}
+bool
+fd_render_condition_check(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (!ctx->cond_query)
+ return true;
+
+ union pipe_query_result res = { 0 };
+ bool wait =
+ ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
+ ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+ if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
+ return (bool)res.u64 != ctx->cond_cond;
+
+ return true;
+}
+
/**
* Optimal hardware path for blitting pixels.
* Scaling, format conversion, up- and downsampling (resolve) are allowed.
@@ -630,6 +781,9 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
+ if (info.render_condition_enable && !fd_render_condition_check(pctx))
+ return;
+
if (util_try_blit_via_copy_region(pctx, &info)) {
return; /* done */
}
@@ -646,13 +800,13 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
return;
}
- fd_blitter_pipe_begin(ctx);
+ fd_blitter_pipe_begin(ctx, info.render_condition_enable);
util_blitter_blit(ctx->blitter, &info);
fd_blitter_pipe_end(ctx);
}
static void
-fd_blitter_pipe_begin(struct fd_context *ctx)
+fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond)
{
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
@@ -673,6 +827,9 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
(void **)ctx->fragtex.samplers);
util_blitter_save_fragment_sampler_views(ctx->blitter,
ctx->fragtex.num_textures, ctx->fragtex.textures);
+ if (!render_cond)
+ util_blitter_save_render_condition(ctx->blitter,
+ ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 7549becaa1f..9a9b0d08244 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -73,6 +73,7 @@ struct fd_resource {
struct u_resource base;
struct fd_bo *bo;
uint32_t cpp;
+ enum pipe_format internal_format;
bool layer_first; /* see above description */
uint32_t layer_size;
struct fd_resource_slice slices[MAX_MIP_LEVELS];
@@ -135,4 +136,6 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
void fd_resource_screen_init(struct pipe_screen *pscreen);
void fd_resource_context_init(struct pipe_context *pctx);
+bool fd_render_condition_check(struct pipe_context *pctx);
+
#endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 56d1834ef9c..5bbe4016a2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -160,11 +160,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
return 0;
@@ -176,27 +174,31 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- /* ignoring first/last_element.. but I guess that should be
- * easy to add..
- */
+ if (is_a3xx(screen)) return 16;
+ if (is_a4xx(screen)) return 32;
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- /* I think 32k on a4xx.. and we could possibly emulate more
- * by pretending 2d/rect textures and splitting high bits
- * of index into 2nd dimension..
+ /* We could possibly emulate more by pretending 2d/rect textures and
+ * splitting high bits of index into 2nd dimension..
*/
- return 16383;
-
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- return is_a3xx(screen);
+ if (is_a3xx(screen)) return 8192;
+ if (is_a4xx(screen)) return 16384;
+ return 0;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
return is_a4xx(screen);
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -205,7 +207,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (glsl120)
return 120;
- return is_ir3(screen) ? 130 : 120;
+ return is_ir3(screen) ? 140 : 120;
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -220,15 +222,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
- case PIPE_CAP_FAKE_SW_MSAA:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
index 04e4643b4c9..f5611abaec8 100644
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -197,33 +197,15 @@ fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
continue;
const struct util_format_channel_description *chan =
- &desc->channel[desc->swizzle[j]];
- int size = chan->size;
-
- /* The Z16 texture format we use seems to look in the
- * 32-bit border color slots
- */
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
- size = 32;
-
- /* Formats like R11G11B10 or RGB9_E5 don't specify
- * per-channel sizes properly.
- */
- if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
- size = 16;
-
- if (chan->pure_integer && size > 16)
- bcolor32[desc->swizzle[j] + 4] =
- sampler->border_color.i[j];
- else if (size > 16)
- bcolor32[desc->swizzle[j]] =
- fui(sampler->border_color.f[j]);
- else if (chan->pure_integer)
- bcolor[desc->swizzle[j] + 8] =
- sampler->border_color.i[j];
- else
+ &desc->channel[desc->swizzle[j]];
+ if (chan->pure_integer) {
+ bcolor32[desc->swizzle[j] + 4] = sampler->border_color.i[j];
+ bcolor[desc->swizzle[j] + 8] = sampler->border_color.i[j];
+ } else {
+ bcolor32[desc->swizzle[j]] = fui(sampler->border_color.f[j]);
bcolor[desc->swizzle[j]] =
- util_float_to_half(sampler->border_color.f[j]);
+ util_float_to_half(sampler->border_color.f[j]);
+ }
}
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73a3c6..156bb0be247 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1177,6 +1177,33 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
break;
+ case nir_op_bit_count:
+ dst[0] = ir3_CBITS_B(b, src[0], 0);
+ break;
+ case nir_op_ifind_msb: {
+ struct ir3_instruction *cmp;
+ dst[0] = ir3_CLZ_S(b, src[0], 0);
+ cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
+ cmp->cat2.condition = IR3_COND_GE;
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ cmp, 0, dst[0], 0);
+ break;
+ }
+ case nir_op_ufind_msb:
+ dst[0] = ir3_CLZ_B(b, src[0], 0);
+ dst[0] = ir3_SEL_B32(b,
+ ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+ src[0], 0, dst[0], 0);
+ break;
+ case nir_op_find_lsb:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ dst[0] = ir3_CLZ_B(b, dst[0], 0);
+ break;
+ case nir_op_bitfield_reverse:
+ dst[0] = ir3_BFREV_B(b, src[0], 0);
+ break;
+
default:
compile_error(ctx, "Unhandled ALU op: %s\n",
nir_op_infos[alu->op].name);
@@ -1547,10 +1574,10 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
unreachable("bad sampler_dim");
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
flags |= IR3_INSTR_S;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
flags |= IR3_INSTR_A;
*flagsp = flags;
@@ -1618,12 +1645,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_texop_txl: opc = OPC_SAML; break;
case nir_texop_txd: opc = OPC_SAMGQ; break;
case nir_texop_txf: opc = OPC_ISAML; break;
+ case nir_texop_lod: opc = OPC_GETLOD; break;
case nir_texop_txf_ms:
case nir_texop_txs:
- case nir_texop_lod:
case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
+ case nir_texop_samples_identical:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
@@ -1665,10 +1693,10 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
src0[nsrc0++] = create_immed(b, fui(0.5));
}
- if (tex->is_shadow)
+ if (tex->is_shadow && tex->op != nir_texop_lod)
src0[nsrc0++] = compare;
- if (tex->is_array)
+ if (tex->is_array && tex->op != nir_texop_lod)
src0[nsrc0++] = coord[coords];
if (has_proj) {
@@ -1717,7 +1745,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_type_int:
type = TYPE_S32;
break;
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
type = TYPE_U32;
break;
@@ -1725,12 +1753,26 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
unreachable("bad dest_type");
}
+ if (opc == OPC_GETLOD)
+ type = TYPE_U32;
+
sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
flags, tex->sampler_index, tex->sampler_index,
create_collect(b, src0, nsrc0),
create_collect(b, src1, nsrc1));
split_dest(b, dst, sam, 4);
+
+ /* GETLOD returns results in 4.8 fixed point */
+ if (opc == OPC_GETLOD) {
+ struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
+
+ compile_assert(ctx, tex->dest_type == nir_type_float);
+ for (i = 0; i < 2; i++) {
+ dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
+ factor, 0);
+ }
+ }
}
static void
@@ -1889,6 +1931,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
+ case nir_texop_samples_identical:
+ unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 7e2c27d9765..5d1cccb0daa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -166,7 +166,9 @@ struct ir3_shader_variant {
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_psize;
- /* vertices/inputs: */
+ /* attributes (VS) / varyings (FS):
+ * Note that sysval's should come *after* normal inputs.
+ */
unsigned inputs_count;
struct {
uint8_t slot;
@@ -229,7 +231,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
- struct pipe_context *pctx;
+ struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
const struct tgsi_token *tokens;
struct pipe_stream_output_info stream_output;
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 83f81135590..31a93659647 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -64,6 +64,8 @@ NV50_C_SOURCES := \
nv50/nv50_3ddefs.xml.h \
nv50/nv50_3d.xml.h \
nv50/nv50_blit.h \
+ nv50/nv50_compute.c \
+ nv50/nv50_compute.xml.h \
nv50/nv50_context.c \
nv50/nv50_context.h \
nv50/nv50_defs.xml.h \
@@ -76,6 +78,10 @@ NV50_C_SOURCES := \
nv50/nv50_query.h \
nv50/nv50_query_hw.c \
nv50/nv50_query_hw.h \
+ nv50/nv50_query_hw_metric.c \
+ nv50/nv50_query_hw_metric.h \
+ nv50/nv50_query_hw_sm.c \
+ nv50/nv50_query_hw_sm.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 2a13e1086a0..9f84de03a4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2357,6 +2357,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_PFETCH:
emitPFETCH(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 7859c8e79bd..41d2cc9167c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
Instruction *st;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
- st = new_Instruction(func, OP_STORE, ty);
- st->setSrc(0, slot);
- st->setSrc(1, lval);
lval->noSpill = 1;
+ if (ty != TYPE_B96) {
+ st = new_Instruction(func, OP_STORE, ty);
+ st->setSrc(0, slot);
+ st->setSrc(1, lval);
+ } else {
+ st = new_Instruction(func, OP_SPLIT, ty);
+ st->setSrc(0, lval);
+ for (int d = 0; d < lval->reg.size / 4; ++d)
+ st->setDef(d, new_LValue(func, FILE_GPR));
+
+ for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
+ Value *tmp = cloneShallow(func, slot);
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
+ s->setSrc(0, tmp);
+ s->setSrc(1, st->getDef(d));
+ defi->bb->insertAfter(defi, s);
+ }
+ }
} else {
st = new_Instruction(func, OP_CVT, ty);
st->setDef(0, slot);
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
Instruction *ld;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
lval->noSpill = 1;
- ld = new_Instruction(func, OP_LOAD, ty);
+ if (ty != TYPE_B96) {
+ ld = new_Instruction(func, OP_LOAD, ty);
+ } else {
+ ld = new_Instruction(func, OP_MERGE, ty);
+ for (int d = 0; d < lval->reg.size / 4; ++d) {
+ Value *tmp = cloneShallow(func, slot);
+ LValue *val;
+ tmp->reg.size = 4;
+ tmp->reg.data.offset += 4 * d;
+
+ Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
+ l->setDef(0, (val = new_LValue(func, FILE_GPR)));
+ l->setSrc(0, tmp);
+ usei->bb->insertBefore(usei, l);
+ ld->setSrc(d, val);
+ val->noSpill = 1;
+ }
+ ld->setDef(0, lval);
+ usei->bb->insertBefore(usei, ld);
+ return lval;
+ }
} else {
ld = new_Instruction(func, OP_CVT, ty);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 68e69beb08f..1695553d793 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
buffer->domain = NOUVEAU_BO_GART;
- } else if (buffer->base.bind &
- (screen->vidmem_bindings & screen->sysmem_bindings)) {
+ } else if (buffer->base.bind == 0 || (buffer->base.bind &
+ (screen->vidmem_bindings & screen->sysmem_bindings))) {
switch (buffer->base.usage) {
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
@@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
if (buffer->base.bind & screen->sysmem_bindings)
buffer->domain = NOUVEAU_BO_GART;
}
+ /* There can be very special situations where we want non-gpu-mapped
+ * buffers, but never through this interface.
+ */
+ assert(buffer->domain);
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
if (ret == false)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
new file mode 100644
index 00000000000..6d23fd66945
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2012 Francisco Jerez
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_compute.xml.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+int
+nv50_screen_compute_setup(struct nv50_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
+ unsigned obj_class;
+ int i, ret;
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ case 0x80:
+ case 0x90:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa3:
+ case 0xa5:
+ case 0xa8:
+ obj_class = NVA3_COMPUTE_CLASS;
+ break;
+ default:
+ obj_class = NV50_COMPUTE_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->handle);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ for (i = 0; i < 15; i++) {
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+ }
+
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ PUSH_DATA (push, ~0);
+ BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
+
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ PUSH_DATA (push, 7);
+ BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ PUSH_DATA (push, 0x54);
+ BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls_bo->offset + 65536);
+ PUSH_DATA (push, screen->tls_bo->offset + 65536);
+ BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
+
+ return 0;
+}
+
+static bool
+nv50_compute_validate_program(struct nv50_context *nv50)
+{
+ struct nv50_program *prog = nv50->compprog;
+
+ if (prog->mem)
+ return true;
+
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset, &nv50->base.debug);
+ if (!prog->translated)
+ return false;
+ }
+ if (unlikely(!prog->code_size))
+ return false;
+
+ if (likely(prog->code_size)) {
+ if (nv50_program_upload_code(nv50, prog)) {
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+nv50_compute_validate_globals(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ if (res)
+ nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
+ nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static bool
+nv50_compute_state_validate(struct nv50_context *nv50)
+{
+ if (!nv50_compute_validate_program(nv50))
+ return false;
+
+ if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
+ nv50_compute_validate_globals(nv50);
+
+ /* TODO: validate textures, samplers, surfaces */
+
+ nv50_bufctx_fence(nv50->bufctx_cp, false);
+
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
+ return false;
+ if (unlikely(nv50->state.flushed))
+ nv50_bufctx_fence(nv50->bufctx_cp, true);
+
+ return true;
+}
+
+static void
+nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ unsigned size = align(nv50->compprog->parm_size, 0x4);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ PUSH_DATA (push, (size / 4) << 8);
+
+ if (size) {
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_bo *bo = NULL;
+ unsigned offset;
+
+ mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
+ assert(mm);
+
+ nouveau_bo_map(bo, 0, screen->base.client);
+ memcpy(bo->map + offset, input, size);
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ nouveau_pushbuf_data(push, bo, offset, size);
+
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
+ nouveau_bo_ref(NULL, &bo);
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+ }
+}
+
+static uint32_t
+nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
+{
+ struct nv50_program *prog = nv50->compprog;
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned i;
+
+ for (i = 0; i < prog->cp.num_syms; ++i) {
+ if (syms[i].label == label)
+ return prog->code_base + syms[i].offset;
+ }
+ return prog->code_base; /* no symbols or symbol not found */
+}
+
+void
+nv50_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label, const void *input)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
+ struct nv50_program *cp = nv50->compprog;
+ bool ret;
+
+ ret = !nv50_compute_state_validate(nv50);
+ if (ret) {
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ return;
+ }
+
+ nv50_compute_upload_input(nv50, input);
+
+ BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
+
+ BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
+ BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, cp->max_gpr);
+
+ /* grid/block setup */
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ PUSH_DATA (push, 1 << 16 | block_size);
+ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
+ BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 1);
+
+ /* kernel launching */
+ BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* bind a compute shader clobbers fragment shader state */
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
new file mode 100644
index 00000000000..268d11253b6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.xml.h
@@ -0,0 +1,444 @@
+#ifndef NV50_COMPUTE_XML
+#define NV50_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/graph/g80_compute.xml ( 14027 bytes, from 2015-02-14 02:01:36)
+- rnndb/copyright.xml ( 6456 bytes, from 2015-02-14 02:01:36)
+- rnndb/nvchipsets.xml ( 2833 bytes, from 2015-04-28 16:28:33)
+- rnndb/fifo/nv_object.xml ( 15390 bytes, from 2015-04-22 20:36:09)
+- rnndb/g80_defs.xml ( 18210 bytes, from 2015-10-19 20:49:59)
+
+Copyright (C) 2006-2015 by the following authors:
+- Artur Huillet <[email protected]> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <[email protected]> (koala_br)
+- Carlos Martin <[email protected]> (carlosmn)
+- Christoph Bumiller <[email protected]> (calim, chrisbmr)
+- Dawid Gajownik <[email protected]> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <[email protected]> (lumag)
+- EdB <[email protected]> (edb_)
+- Erik Waling <[email protected]> (erikwaling)
+- Francisco Jerez <[email protected]> (curro)
+- Ilia Mirkin <[email protected]> (imirkin)
+- jb17bsome <[email protected]> (jb17bsome)
+- Jeremy Kolb <[email protected]> (kjeremy)
+- Laurent Carlier <[email protected]> (lordheavy)
+- Luca Barbieri <[email protected]> (lb, lb1)
+- Maarten Maathuis <[email protected]> (stillunknown)
+- Marcin KoÅ›cielnicki <[email protected]> (mwk, koriakin)
+- Mark Carey <[email protected]> (careym)
+- Matthieu Castet <[email protected]> (mat-c)
+- nvidiaman <[email protected]> (nvidiaman)
+- Patrice Mandin <[email protected]> (pmandin, pmdata)
+- Pekka Paalanen <[email protected]> (pq, ppaalanen)
+- Peter Popov <[email protected]> (ironpeter)
+- Richard Hughes <[email protected]> (hughsient)
+- Rudi Cilibrasi <[email protected]> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <[email protected]> (leroutier)
+- Stephane Marchesin <[email protected]> (marcheu)
+- sturmflut <[email protected]> (sturmflut)
+- Sylvain Munaut <[email protected]>
+- Victor Stinner <[email protected]> (haypo)
+- Wladmir van der Laan <[email protected]> (miathan6)
+- Younes Manton <[email protected]> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_COMPUTE_DMA_NOTIFY 0x00000180
+
+#define NV50_COMPUTE_DMA_GLOBAL 0x000001a0
+
+#define NV50_COMPUTE_DMA_QUERY 0x000001a4
+
+#define NV50_COMPUTE_DMA_LOCAL 0x000001b8
+
+#define NV50_COMPUTE_DMA_STACK 0x000001bc
+
+#define NV50_COMPUTE_DMA_CODE_CB 0x000001c0
+
+#define NV50_COMPUTE_DMA_TSC 0x000001c4
+
+#define NV50_COMPUTE_DMA_TIC 0x000001c8
+
+#define NV50_COMPUTE_DMA_TEXTURE 0x000001cc
+
+#define NV50_COMPUTE_UNK0200 0x00000200
+#define NV50_COMPUTE_UNK0200_UNK1__MASK 0x0000ffff
+#define NV50_COMPUTE_UNK0200_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK0200_UNK2__MASK 0x00ff0000
+#define NV50_COMPUTE_UNK0200_UNK2__SHIFT 16
+
+#define NV50_COMPUTE_UNK0204 0x00000204
+
+#define NV50_COMPUTE_UNK0208 0x00000208
+
+#define NV50_COMPUTE_UNK020C 0x0000020c
+
+#define NV50_COMPUTE_CP_ADDRESS_HIGH 0x00000210
+
+#define NV50_COMPUTE_CP_ADDRESS_LOW 0x00000214
+
+#define NV50_COMPUTE_STACK_ADDRESS_HIGH 0x00000218
+
+#define NV50_COMPUTE_STACK_ADDRESS_LOW 0x0000021c
+
+#define NV50_COMPUTE_STACK_SIZE_LOG 0x00000220
+
+#define NV50_COMPUTE_CALL_LIMIT_LOG 0x00000224
+
+#define NV50_COMPUTE_UNK0228 0x00000228
+#define NV50_COMPUTE_UNK0228_UNK0 0x00000001
+#define NV50_COMPUTE_UNK0228_UNK4__MASK 0x00000ff0
+#define NV50_COMPUTE_UNK0228_UNK4__SHIFT 4
+#define NV50_COMPUTE_UNK0228_UNK12__MASK 0x000ff000
+#define NV50_COMPUTE_UNK0228_UNK12__SHIFT 12
+
+#define NV50_COMPUTE_TSC_ADDRESS_HIGH 0x0000022c
+
+#define NV50_COMPUTE_TSC_ADDRESS_LOW 0x00000230
+#define NV50_COMPUTE_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_COMPUTE_TSC_LIMIT 0x00000234
+#define NV50_COMPUTE_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_COMPUTE_CB_ADDR 0x00000238
+#define NV50_COMPUTE_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_COMPUTE_CB_ADDR_ID__SHIFT 8
+#define NV50_COMPUTE_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_COMPUTE_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_COMPUTE_CB_DATA(i0) (0x0000023c + 0x4*(i0))
+#define NV50_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NV50_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NV50_COMPUTE_TSC_FLUSH 0x0000027c
+#define NV50_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_TIC_FLUSH 0x00000280
+#define NV50_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_COMPUTE_DELAY1 0x00000284
+
+#define NV50_COMPUTE_WATCHDOG_TIMER 0x00000288
+
+#define NV50_COMPUTE_DELAY2 0x0000028c
+
+#define NV50_COMPUTE_UNK0290 0x00000290
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_HIGH 0x00000294
+
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW 0x00000298
+#define NV50_COMPUTE_LOCAL_ADDRESS_LOW__ALIGN 0x00000100
+
+#define NV50_COMPUTE_LOCAL_SIZE_LOG 0x0000029c
+
+#define NV50_COMPUTE_UNK02A0 0x000002a0
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_HIGH 0x000002a4
+
+#define NV50_COMPUTE_CB_DEF_ADDRESS_LOW 0x000002a8
+
+#define NV50_COMPUTE_CB_DEF_SET 0x000002ac
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_COMPUTE_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_COMPUTE_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_COMPUTE_UNK02B0 0x000002b0
+
+#define NV50_COMPUTE_BLOCK_ALLOC 0x000002b4
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCK_ALLOC_THREADS__SHIFT 0
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__MASK 0x00ff0000
+#define NV50_COMPUTE_BLOCK_ALLOC_BARRIERS__SHIFT 16
+
+#define NV50_COMPUTE_LANES32_ENABLE 0x000002b8
+
+#define NV50_COMPUTE_UNK02BC 0x000002bc
+#define NV50_COMPUTE_UNK02BC_UNK1__MASK 0x00000007
+#define NV50_COMPUTE_UNK02BC_UNK1__SHIFT 0
+#define NV50_COMPUTE_UNK02BC_UNK2__MASK 0x00000070
+#define NV50_COMPUTE_UNK02BC_UNK2__SHIFT 4
+
+#define NV50_COMPUTE_CP_REG_ALLOC_TEMP 0x000002c0
+
+#define NV50_COMPUTE_TIC_ADDRESS_HIGH 0x000002c4
+
+#define NV50_COMPUTE_TIC_ADDRESS_LOW 0x000002c8
+
+#define NV50_COMPUTE_TIC_LIMIT 0x000002cc
+
+#define NV50_COMPUTE_MP_PM_SET(i0) (0x000002d0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_SET__LEN 0x00000004
+
+#define NV50_COMPUTE_MP_PM_CONTROL(i0) (0x000002e0 + 0x4*(i0))
+#define NV50_COMPUTE_MP_PM_CONTROL__ESIZE 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL__LEN 0x00000004
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__MASK 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE__SHIFT 0
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_MODE_LOGOP_PULSE 0x00000001
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__MASK 0x00000070
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT__SHIFT 4
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK0 0x00000000
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK1 0x00000010
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK2 0x00000020
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK3 0x00000030
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK4 0x00000040
+#define NV50_COMPUTE_MP_PM_CONTROL_UNIT_UNK5 0x00000050
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__MASK 0x00ffff00
+#define NV50_COMPUTE_MP_PM_CONTROL_FUNC__SHIFT 8
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__MASK 0xff000000
+#define NV50_COMPUTE_MP_PM_CONTROL_SIG__SHIFT 24
+
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE 0x000002f0
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_0 0x00000001
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_1 0x00000002
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_2 0x00000004
+#define NV50_COMPUTE_MP_PM_OVERFLOW_TRAP_ENABLE_3 0x00000008
+
+#define NV50_COMPUTE_UNK02F4 0x000002f4
+
+#define NV50_COMPUTE_BLOCKDIM_LATCH 0x000002f8
+
+#define NV50_COMPUTE_LOCAL_WARPS_LOG_ALLOC 0x000002fc
+
+#define NV50_COMPUTE_LOCAL_WARPS_NO_CLAMP 0x00000300
+
+#define NV50_COMPUTE_STACK_WARPS_LOG_ALLOC 0x00000304
+
+#define NV50_COMPUTE_STACK_WARPS_NO_CLAMP 0x00000308
+
+#define NV50_COMPUTE_UNK030C 0x0000030c
+
+#define NV50_COMPUTE_QUERY_ADDRESS_HIGH 0x00000310
+
+#define NV50_COMPUTE_QUERY_ADDRESS_LOW 0x00000314
+
+#define NV50_COMPUTE_QUERY_SEQUENCE 0x00000318
+
+#define NV50_COMPUTE_QUERY_GET 0x0000031c
+#define NV50_COMPUTE_QUERY_GET_INTR 0x00000200
+#define NV50_COMPUTE_QUERY_GET_SHORT 0x00008000
+
+#define NV50_COMPUTE_COND_ADDRESS_HIGH 0x00000320
+
+#define NV50_COMPUTE_COND_ADDRESS_LOW 0x00000324
+
+#define NV50_COMPUTE_COND_MODE 0x00000328
+#define NV50_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NV50_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NV50_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NV50_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_COMPUTE_UNK032C 0x0000032c
+
+#define NV50_COMPUTE_UNK0330 0x00000330
+
+#define NV50_COMPUTE_UNK0334(i0) (0x00000334 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0334__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0334__LEN 0x00000003
+
+#define NV50_COMPUTE_UNK0340(i0) (0x00000340 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0340__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0340__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0348(i0) (0x00000348 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0348__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0348__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0350(i0) (0x00000350 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0350__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0350__LEN 0x00000002
+
+#define NV50_COMPUTE_UNK0358 0x00000358
+
+#define NV50_COMPUTE_UNK035C 0x0000035c
+
+#define NV50_COMPUTE_UNK0360 0x00000360
+#define NV50_COMPUTE_UNK0360_UNK0__MASK 0x000000f0
+#define NV50_COMPUTE_UNK0360_UNK0__SHIFT 4
+#define NV50_COMPUTE_UNK0360_UNK1__MASK 0x00000f00
+#define NV50_COMPUTE_UNK0360_UNK1__SHIFT 8
+
+#define NV50_COMPUTE_UNK0364 0x00000364
+
+#define NV50_COMPUTE_LAUNCH 0x00000368
+
+#define NV50_COMPUTE_UNK036C 0x0000036c
+
+#define NV50_COMPUTE_UNK0370 0x00000370
+
+#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__MASK 0x000000ff
+#define NV50_COMPUTE_USER_PARAM_COUNT_UNK0__SHIFT 0
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MASK 0x0000ff00
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__SHIFT 8
+#define NV50_COMPUTE_USER_PARAM_COUNT_COUNT__MAX 0x00000040
+
+#define NV50_COMPUTE_LINKED_TSC 0x00000378
+
+#define NV50_COMPUTE_UNK037C 0x0000037c
+#define NV50_COMPUTE_UNK037C_ALWAYS_DERIV 0x00000001
+#define NV50_COMPUTE_UNK037C_UNK16 0x00010000
+
+#define NV50_COMPUTE_CODE_CB_FLUSH 0x00000380
+
+#define NV50_COMPUTE_UNK0384 0x00000384
+
+#define NV50_COMPUTE_GRIDID 0x00000388
+
+#define NV50_COMPUTE_UNK038C(i0) (0x0000038c + 0x4*(i0))
+#define NV50_COMPUTE_UNK038C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK038C__LEN 0x00000003
+
+#define NV50_COMPUTE_WRCACHE_FLUSH 0x00000398
+
+#define NV50_COMPUTE_UNK039C(i0) (0x0000039c + 0x4*(i0))
+#define NV50_COMPUTE_UNK039C__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK039C__LEN 0x00000002
+
+#define NV50_COMPUTE_GRIDDIM 0x000003a4
+#define NV50_COMPUTE_GRIDDIM_X__MASK 0x0000ffff
+#define NV50_COMPUTE_GRIDDIM_X__SHIFT 0
+#define NV50_COMPUTE_GRIDDIM_Y__MASK 0xffff0000
+#define NV50_COMPUTE_GRIDDIM_Y__SHIFT 16
+
+#define NV50_COMPUTE_SHARED_SIZE 0x000003a8
+#define NV50_COMPUTE_SHARED_SIZE__MAX 0x00004000
+#define NV50_COMPUTE_SHARED_SIZE__ALIGN 0x00000040
+
+#define NV50_COMPUTE_BLOCKDIM_XY 0x000003ac
+#define NV50_COMPUTE_BLOCKDIM_XY_X__MASK 0x0000ffff
+#define NV50_COMPUTE_BLOCKDIM_XY_X__SHIFT 0
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__MASK 0xffff0000
+#define NV50_COMPUTE_BLOCKDIM_XY_Y__SHIFT 16
+
+#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NV50_COMPUTE_BLOCKDIM_Z__MIN 0x00000001
+#define NV50_COMPUTE_BLOCKDIM_Z__MAX 0x00000040
+
+#define NV50_COMPUTE_CP_START_ID 0x000003b4
+
+#define NV50_COMPUTE_REG_MODE 0x000003b8
+#define NV50_COMPUTE_REG_MODE_PACKED 0x00000001
+#define NV50_COMPUTE_REG_MODE_STRIPED 0x00000002
+
+#define NV50_COMPUTE_TEX_LIMITS 0x000003bc
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_COMPUTE_BIND_TSC 0x000003c0
+#define NV50_COMPUTE_BIND_TSC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_COMPUTE_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_COMPUTE_BIND_TIC 0x000003c4
+#define NV50_COMPUTE_BIND_TIC_VALID 0x00000001
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_COMPUTE_SET_PROGRAM_CB 0x000003c8
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_COMPUTE_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_COMPUTE_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_COMPUTE_SET_PROGRAM_CB_VALID 0x000000ff
+
+#define NV50_COMPUTE_UNK03CC 0x000003cc
+
+#define NV50_COMPUTE_TEX_CACHE_CTL 0x000003d0
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_COMPUTE_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_COMPUTE_UNK03D4 0x000003d4
+
+#define NV50_COMPUTE_UNK03D8 0x000003d8
+
+#define NV50_COMPUTE_UNK03DC 0x000003dc
+
+#define NV50_COMPUTE_UNK03E0 0x000003e0
+
+#define NV50_COMPUTE_UNK03E4 0x000003e4
+
+#define NVA3_COMPUTE_TEX_MISC 0x000003e8
+#define NVA3_COMPUTE_TEX_MISC_UNK1 0x00000001
+#define NVA3_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NV50_COMPUTE_GLOBAL(i0) (0x00000400 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL__ESIZE 0x00000020
+#define NV50_COMPUTE_GLOBAL__LEN 0x00000010
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_HIGH(i0) (0x00000400 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_ADDRESS_LOW(i0) (0x00000404 + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_PITCH(i0) (0x00000408 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_PITCH__MAX 0x00800000
+#define NV50_COMPUTE_GLOBAL_PITCH__ALIGN 0x00000100
+
+#define NV50_COMPUTE_GLOBAL_LIMIT(i0) (0x0000040c + 0x20*(i0))
+
+#define NV50_COMPUTE_GLOBAL_MODE(i0) (0x00000410 + 0x20*(i0))
+#define NV50_COMPUTE_GLOBAL_MODE_LINEAR 0x00000001
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__MASK 0x000000f0
+#define NV50_COMPUTE_GLOBAL_MODE_UNK1__SHIFT 4
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__MASK 0x00000f00
+#define NV50_COMPUTE_GLOBAL_MODE_TILE_MODE__SHIFT 8
+
+#define NV50_COMPUTE_USER_PARAM(i0) (0x00000600 + 0x4*(i0))
+#define NV50_COMPUTE_USER_PARAM__ESIZE 0x00000004
+#define NV50_COMPUTE_USER_PARAM__LEN 0x00000040
+
+#define NV50_COMPUTE_UNK0700(i0) (0x00000700 + 0x4*(i0))
+#define NV50_COMPUTE_UNK0700__ESIZE 0x00000004
+#define NV50_COMPUTE_UNK0700__LEN 0x00000010
+
+
+#endif /* NV50_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 7867c2df7f3..4874b77b1e1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -113,6 +113,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
nouveau_bufctx_del(&nv50->bufctx_3d);
nouveau_bufctx_del(&nv50->bufctx);
+ nouveau_bufctx_del(&nv50->bufctx_cp);
util_unreference_framebuffer_state(&nv50->framebuffer);
@@ -131,6 +132,14 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
if (!nv50->constbuf[s][i].user)
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
}
+
+ for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nv50->global_residents);
}
static void
@@ -159,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
@@ -173,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
@@ -183,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_SAMPLER_VIEW)) {
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
@@ -263,10 +273,13 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
nv50->base.pushbuf = screen->base.pushbuf;
nv50->base.client = screen->base.client;
- ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
- &nv50->bufctx_3d);
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_3D_COUNT,
+ &nv50->bufctx_3d);
if (!ret)
- ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_CP_COUNT,
+ &nv50->bufctx_cp);
if (ret)
goto out_err;
@@ -290,6 +303,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->draw_vbo = nv50_draw_vbo;
pipe->clear = nv50_clear;
+ pipe->launch_grid = nv50_launch_grid;
pipe->flush = nv50_flush;
pipe->texture_barrier = nv50_texture_barrier;
@@ -335,19 +349,30 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ if (screen->compute) {
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->stack_bo);
+ }
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nv50->base.scratch.bo_size = 2 << 20;
+ util_dynarray_init(&nv50->global_residents);
+
return pipe;
out_err:
if (nv50->bufctx_3d)
nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx_cp)
+ nouveau_bufctx_del(&nv50->bufctx_cp);
if (nv50->bufctx)
nouveau_bufctx_del(&nv50->bufctx);
FREE(nv50->blit);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index fb74a9748a3..2cebcd99423 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,10 @@
#define NV50_NEW_MIN_SAMPLES (1 << 22)
#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_CP_PROGRAM (1 << 0)
+#define NV50_NEW_CP_GLOBALS (1 << 1)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
#define NV50_BIND_FB 0
#define NV50_BIND_VERTEX 1
#define NV50_BIND_VERTEX_TMP 2
@@ -58,7 +62,15 @@
#define NV50_BIND_SO 53
#define NV50_BIND_SCREEN 54
#define NV50_BIND_TLS 55
-#define NV50_BIND_COUNT 56
+#define NV50_BIND_3D_COUNT 56
+
+/* compute bufctx (during launch_grid) */
+#define NV50_BIND_CP_GLOBAL 0
+#define NV50_BIND_CP_SCREEN 1
+#define NV50_BIND_CP_QUERY 2
+#define NV50_BIND_CP_COUNT 3
+
+/* bufctx for other operations */
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@@ -101,8 +113,10 @@ struct nv50_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
struct nv50_graph_state state;
@@ -115,6 +129,7 @@ struct nv50_context {
struct nv50_program *vertprog;
struct nv50_program *gmtyprog;
struct nv50_program *fragprog;
+ struct nv50_program *compprog;
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
@@ -163,6 +178,8 @@ struct nv50_context {
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
+
+ struct util_dynarray global_residents;
};
static inline struct nv50_context *
@@ -302,4 +319,9 @@ struct pipe_video_buffer *
nv98_video_buffer_create(struct pipe_context *pipe,
const struct pipe_video_buffer *template);
+/* nv50_compute.c */
+void
+nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
+ uint32_t, const void *);
+
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 89e7a338283..a4b8ddfda95 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,7 +66,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
- prog->vp.vertexid = 1;
continue;
default:
break;
@@ -259,6 +258,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
+ case PIPE_SHADER_COMPUTE:
+ return 0;
default:
return -1;
}
@@ -355,6 +356,9 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->gp.has_layer = 0;
prog->gp.has_viewport = 0;
+ if (prog->type == PIPE_SHADER_COMPUTE)
+ info->prop.cp.inputOffset = 0x10;
+
info->driverPriv = prog;
#ifdef DEBUG
@@ -378,6 +382,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
if (prog->type == PIPE_SHADER_FRAGMENT) {
if (info->prop.fp.writesDepth) {
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
@@ -401,6 +407,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
+ } else
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
}
if (prog->pipe.stream_output.num_outputs)
@@ -423,11 +433,13 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
+ uint8_t prog_type;
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@@ -450,7 +462,14 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
return false;
}
}
- prog->code_base = prog->mem->start;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ /* CP code must be uploaded in FP code segment. */
+ prog_type = 1;
+ } else {
+ prog->code_base = prog->mem->start;
+ prog_type = prog->type;
+ }
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
@@ -468,7 +487,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
- (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
@@ -489,7 +508,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
FREE(p->code);
FREE(p->fixups);
-
+ FREE(p->interps);
FREE(p->so);
memset(p, 0, sizeof(*p));
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 7a33eb11d6d..1de5122a56e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,9 +76,9 @@ struct nv50_program {
ubyte psiz; /* output slot of point size */
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
ubyte edgeflag;
- ubyte vertexid;
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
ubyte clpd_nr;
+ bool need_vertex_id;
} vp;
struct {
@@ -98,6 +98,13 @@ struct nv50_program {
ubyte viewportid; /* hw value of viewport index output */
} gp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+
void *fixups; /* relocation records */
void *interps; /* interpolation records */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index f31eaa0e314..cbef95d07f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -24,6 +24,10 @@ struct push_context {
struct translate *translate;
bool primitive_restart;
+
+ bool need_vertex_id;
+ int32_t index_bias;
+
uint32_t prim;
uint32_t restart_index;
uint32_t instance_id;
@@ -74,6 +78,11 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -107,6 +116,11 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -140,6 +154,11 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
size = ctx->vertex_words * nr;
+ if (unlikely(ctx->need_vertex_id)) {
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, *elts + ctx->index_bias);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
@@ -161,10 +180,18 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
static void
emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
{
+ uint32_t elts = 0;
+
while (count) {
unsigned push = MIN2(count, ctx->packet_vertex_limit);
unsigned size = ctx->vertex_words * push;
+ if (unlikely(ctx->need_vertex_id)) {
+ /* For non-indexed draws, gl_VertexID goes up after each vertex. */
+ BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx->push, elts++);
+ }
+
BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
@@ -216,7 +243,14 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.push = nv50->base.pushbuf;
ctx.translate = nv50->vertex->translate;
- ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+
+ ctx.need_vertex_id = nv50->screen->base.class_3d >= NV84_3D_CLASS &&
+ nv50->vertprog->vp.need_vertex_id && (nv50->vertex->num_elements < 32);
+ ctx.index_bias = info->index_bias;
+
+ /* For indexed draws, gl_VertexID must be emitted for every vertex. */
+ ctx.packet_vertex_limit =
+ ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
ctx.vertex_words = nv50->vertex->vertex_size;
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
@@ -307,4 +341,10 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.instance_id++;
ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
+
+ if (unlikely(ctx.need_vertex_id)) {
+ /* Reset gl_VertexID to prevent future indexed draws to be confused. */
+ BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (ctx.push, nv50->state.index_bias);
+ }
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index dd9b85b7208..4cd3b615606 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
static struct pipe_query *
nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
@@ -152,4 +154,79 @@ nv50_init_query_functions(struct nv50_context *nv50)
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
pipe->render_condition = nv50_render_condition;
+ nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
+}
+
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int num_hw_queries = 0;
+
+ num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_queries;
+
+ /* Init default values. */
+ info->name = "this_is_not_the_query_you_are_looking_for";
+ info->query_type = 0xdeadd01d;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->group_id = -1;
+ info->flags = 0;
+
+ return nv50_hw_get_driver_query_info(screen, id, info);
+}
+
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_group_info *info)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += 2;
+
+ if (!info)
+ return count;
+
+ if (id == NV50_HW_SM_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "MP counters";
+
+ /* Because we can't expose the number of hardware counters needed
+ * for each different query, we don't want to allow more than one
+ * active query simultaneously to avoid failure when the maximum
+ * number of counters is reached. Note that these groups of GPU
+ * counters are currently only used by AMD_performance_monitor.
+ */
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_SM_QUERY_COUNT;
+ return 1;
+ }
+ }
+ } else
+ if (id == NV50_HW_METRIC_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = "Performance metrics";
+ info->max_active_queries = 1;
+ info->num_queries = NV50_HW_METRIC_QUERY_COUNT;
+ return 1;
+ }
+ }
+ }
+
+ /* user asked for info about non-existing query group */
+ info->name = "this_is_not_the_query_group_you_are_looking_for";
+ info->max_active_queries = 0;
+ info->num_queries = 0;
+ return 0;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h
index d990285c857..bd4c0a386f6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -28,6 +28,12 @@ nv50_query(struct pipe_query *pipe)
return (struct nv50_query *)pipe;
}
+/*
+ * Driver queries groups:
+ */
+#define NV50_HW_SM_QUERY_GROUP 0
+#define NV50_HW_METRIC_QUERY_GROUP 1
+
void nv50_init_query_functions(struct nv50_context *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 945ce7abe50..b6ebbbf1010 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -25,6 +25,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
#include "nv_object.xml.h"
#define NV50_HW_QUERY_STATE_READY 0
@@ -41,7 +43,7 @@
#define NV50_HW_QUERY_ALLOC_SPACE 256
-static bool
+bool
nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
int size)
{
@@ -122,6 +124,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->begin_query)
+ return hq->funcs->begin_query(nv50, hq);
+
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render condition to false even *after* we re-
* initialized it to true.
@@ -193,6 +198,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
+ if (hq->funcs && hq->funcs->end_query) {
+ hq->funcs->end_query(nv50, hq);
+ return;
+ }
+
hq->state = NV50_HW_QUERY_STATE_ENDED;
switch (q->type) {
@@ -261,6 +271,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
uint64_t *data64 = (uint64_t *)hq->data;
int i;
+ if (hq->funcs && hq->funcs->get_query_result)
+ return hq->funcs->get_query_result(nv50, hq, wait, result);
+
if (hq->state != NV50_HW_QUERY_STATE_READY)
nv50_hw_query_update(q);
@@ -331,6 +344,18 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
struct nv50_hw_query *hq;
struct nv50_query *q;
+ hq = nv50_hw_sm_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
+ hq = nv50_hw_metric_create_query(nv50, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nv50_query *)hq;
+ }
+
hq = CALLOC_STRUCT(nv50_hw_query);
if (!hq)
return NULL;
@@ -375,6 +400,26 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
return q;
}
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
+
+ num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
+ num_hw_metric_queries =
+ nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
+
+ if (!info)
+ return num_hw_sm_queries + num_hw_metric_queries;
+
+ if (id < num_hw_sm_queries)
+ return nv50_hw_sm_get_driver_query_info(screen, id, info);
+
+ return nv50_hw_metric_get_driver_query_info(screen,
+ id - num_hw_sm_queries, info);
+}
+
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct nv50_query *q, unsigned result_offset)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index 294c67de9a4..82ec6bd2d96 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -8,8 +8,19 @@
#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+struct nv50_hw_query;
+
+struct nv50_hw_query_funcs {
+ void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *);
+ void (*end_query)(struct nv50_context *, struct nv50_hw_query *);
+ boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *,
+ boolean, union pipe_query_result *);
+};
+
struct nv50_hw_query {
struct nv50_query base;
+ const struct nv50_hw_query_funcs *funcs;
uint32_t *data;
uint32_t sequence;
struct nouveau_bo *bo;
@@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q)
struct nv50_query *
nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
+int
+nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+bool
+nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int);
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
struct nv50_query *, unsigned);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
new file mode 100644
index 00000000000..d1bccb94193
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_metric.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NV84+ === */
+static const char *nv50_hw_metric_names[] =
+{
+ "metric-branch_efficiency",
+};
+
+struct nv50_hw_metric_query_cfg {
+ uint32_t queries[4];
+ uint32_t num_queries;
+};
+
+#define _SM(n) NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NV50_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_metric_query_cfg
+sm11_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nv50_hw_metric_query_cfg *sm11_hw_metric_queries[] =
+{
+ _M(BRANCH_EFFICIENCY, &sm11_branch_efficiency),
+};
+
+#undef _SM
+#undef _M
+
+static const struct nv50_hw_metric_query_cfg *
+nv50_hw_metric_query_get_cfg(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return sm11_hw_metric_queries[q->type - NV50_HW_METRIC_QUERY(0)];
+}
+
+static void
+nv50_hw_metric_destroy_query(struct nv50_context *nv50,
+ struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
+ FREE(hmq);
+}
+
+static boolean
+nv50_hw_metric_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->begin_query(nv50, hmq->queries[i]);
+ if (!ret)
+ return ret;
+ }
+ return ret;
+}
+
+static void
+nv50_hw_metric_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->end_query(nv50, hmq->queries[i]);
+}
+
+static uint64_t
+sm11_hw_metric_calc_result(struct nv50_hw_query *hq, uint64_t res64[8])
+{
+ switch (hq->base.type - NV50_HW_METRIC_QUERY(0)) {
+ case NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+ /* (branch / (branch + divergent_branch)) * 100 */
+ if (res64[0] + res64[1])
+ return (res64[0] / (double)(res64[0] + res64[1])) * 100;
+ break;
+ default:
+ debug_printf("invalid metric type: %d\n",
+ hq->base.type - NV50_HW_METRIC_QUERY(0));
+ break;
+ }
+ return 0;
+}
+
+static boolean
+nv50_hw_metric_get_query_result(struct nv50_context *nv50,
+ struct nv50_hw_query *hq, boolean wait,
+ union pipe_query_result *result)
+{
+ struct nv50_hw_metric_query *hmq = nv50_hw_metric_query(hq);
+ union pipe_query_result results[4] = {};
+ uint64_t res64[4] = {};
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->get_query_result(nv50, hmq->queries[i],
+ wait, &results[i]);
+ if (!ret)
+ return ret;
+ res64[i] = *(uint64_t *)&results[i];
+ }
+
+ *(uint64_t *)result = sm11_hw_metric_calc_result(hq, res64);
+ return ret;
+}
+
+static const struct nv50_hw_query_funcs hw_metric_query_funcs = {
+ .destroy_query = nv50_hw_metric_destroy_query,
+ .begin_query = nv50_hw_metric_begin_query,
+ .end_query = nv50_hw_metric_end_query,
+ .get_query_result = nv50_hw_metric_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *nv50, unsigned type)
+{
+ const struct nv50_hw_metric_query_cfg *cfg;
+ struct nv50_hw_metric_query *hmq;
+ struct nv50_hw_query *hq;
+ unsigned i;
+
+ if (type < NV50_HW_METRIC_QUERY(0) || type > NV50_HW_METRIC_QUERY_LAST)
+ return NULL;
+
+ hmq = CALLOC_STRUCT(nv50_hw_metric_query);
+ if (!hmq)
+ return NULL;
+
+ hq = &hmq->base;
+ hq->funcs = &hw_metric_query_funcs;
+ hq->base.type = type;
+
+ cfg = nv50_hw_metric_query_get_cfg(nv50, hq);
+
+ for (i = 0; i < cfg->num_queries; i++) {
+ hmq->queries[i] = nv50_hw_sm_create_query(nv50, cfg->queries[i]);
+ if (!hmq->queries[i]) {
+ nv50_hw_metric_destroy_query(nv50, hq);
+ return NULL;
+ }
+ hmq->num_queries++;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_METRIC_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_metric_names[id];
+ info->query_type = NV50_HW_METRIC_QUERY(id);
+ info->group_id = NV50_HW_METRIC_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
new file mode 100644
index 00000000000..f8cfc04084f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.h
@@ -0,0 +1,34 @@
+#ifndef __NV50_QUERY_HW_METRIC_H__
+#define __NV50_QUERY_HW_METRIC_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_metric_query {
+ struct nv50_hw_query base;
+ struct nv50_hw_query *queries[4];
+ unsigned num_queries;
+};
+
+static inline struct nv50_hw_metric_query *
+nv50_hw_metric_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_metric_query *)hq;
+}
+
+/*
+ * Driver metrics queries:
+ */
+#define NV50_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NV50_HW_METRIC_QUERY_LAST NV50_HW_METRIC_QUERY(NV50_HW_METRIC_QUERY_COUNT - 1)
+enum nv50_hw_metric_queries
+{
+ NV50_HW_METRIC_QUERY_BRANCH_EFFICIENCY = 0,
+ NV50_HW_METRIC_QUERY_COUNT
+};
+
+struct nv50_hw_query *
+nv50_hw_metric_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_metric_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
new file mode 100644
index 00000000000..8453ce76095
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw_sm.h"
+
+#include "nv_object.xml.h"
+#include "nv50/nv50_compute.xml.h"
+
+/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nv50_hw_sm_query_names[] =
+{
+ "branch",
+ "divergent_branch",
+ "instructions",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "sm_cta_launched",
+ "warp_serialize",
+};
+
+static const uint64_t nv50_read_hw_sm_counters_code[] =
+{
+ /* and b32 $r0 $r0 0x0000ffff
+ * add b32 $c0 $r0 $r0 $r0
+ * (lg $c0) ret
+ * mov $r0 $pm0
+ * mov $r1 $pm1
+ * mov $r2 $pm2
+ * mov $r3 $pm3
+ * mov $r4 $physid
+ * ld $r5 b32 s[0x10]
+ * ld $r6 b32 s[0x14]
+ * and b32 $r4 $r4 0x000f0000
+ * shr u32 $r4 $r4 0x10
+ * mul $r4 u24 $r4 0x14
+ * add b32 $r5 $r5 $r4
+ * st b32 g15[$r5] $r0
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r1
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r2
+ * add b32 $r5 $r5 0x04
+ * st b32 g15[$r5] $r3
+ * add b32 $r5 $r5 0x04
+ * exit st b32 g15[$r5] $r6 */
+ 0x00000fffd03f0001ULL,
+ 0x040007c020000001ULL,
+ 0x0000028030000003ULL,
+ 0x6001078000000001ULL,
+ 0x6001478000000005ULL,
+ 0x6001878000000009ULL,
+ 0x6001c7800000000dULL,
+ 0x6000078000000011ULL,
+ 0x4400c78010000815ULL,
+ 0x4400c78010000a19ULL,
+ 0x0000f003d0000811ULL,
+ 0xe410078030100811ULL,
+ 0x0000000340540811ULL,
+ 0x0401078020000a15ULL,
+ 0xa0c00780d00f0a01ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a05ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a09ULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00780d00f0a0dULL,
+ 0x0000000320048a15ULL,
+ 0xa0c00781d00f0a19ULL,
+};
+
+struct nv50_hw_sm_counter_cfg
+{
+ uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */
+ uint32_t unit : 8; /* UNK[0-5] */
+ uint32_t sig : 8; /* signal selection */
+};
+
+struct nv50_hw_sm_query_cfg
+{
+ struct nv50_hw_sm_counter_cfg ctr[4];
+ uint8_t num_counters;
+};
+
+#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 }
+
+/* ==== Compute capability 1.1 (G84+) ==== */
+static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] =
+{
+ _Q(BRANCH, LOGOP, UNK4, 0x02),
+ _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09),
+ _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04),
+ _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26),
+ _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27),
+ _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28),
+ _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29),
+ _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a),
+ _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b),
+ _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c),
+ _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d),
+ _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33),
+ _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b),
+};
+
+static inline uint16_t nv50_hw_sm_get_func(uint8_t slot)
+{
+ switch (slot) {
+ case 0: return 0xaaaa;
+ case 1: return 0xcccc;
+ case 2: return 0xf0f0;
+ case 3: return 0xff00;
+ }
+ return 0;
+}
+
+static const struct nv50_hw_sm_query_cfg *
+nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)];
+}
+
+static void
+nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_query *q = &hq->base;
+ q->funcs->destroy_query(nv50, q);
+}
+
+static boolean
+nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ const struct nv50_hw_sm_query_cfg *cfg;
+ uint16_t func;
+ int i, c;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ /* check if we have enough free counter slots */
+ if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return false;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 4);
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->MPsInTP; ++i) {
+ const unsigned b = (0x14 / 4) * i;
+ hq->data[b + 16] = 0;
+ }
+ hq->sequence++;
+
+ for (i = 0; i < cfg->num_counters; i++) {
+ screen->pm.num_hw_sm_active++;
+
+ /* find free counter slots */
+ for (c = 0; c < 4; ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ hsq->ctr[i] = c;
+ screen->pm.mp_counter[c] = hsq;
+ break;
+ }
+ }
+
+ /* select func to aggregate counters */
+ func = nv50_hw_sm_get_func(c);
+
+ /* configure and reset the counter(s) */
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return true;
+}
+
+static void
+nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, 1, 1 };
+ const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
+ int c;
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = true;
+ prog->max_gpr = 7;
+ prog->parm_size = 8;
+ prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c]) {
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ PUSH_DATA (push, 0);
+ }
+ }
+
+ /* release counters for this query */
+ for (c = 0; c < 4; c++) {
+ if (screen->pm.mp_counter[c] == hsq) {
+ screen->pm.num_hw_sm_active--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ hq->bo);
+
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = hq->bo->offset + hq->base_offset;
+ input[1] = hq->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
+
+ /* re-active other counters */
+ PUSH_SPACE(push, 8);
+ mask = 0;
+ for (c = 0; c < 4; c++) {
+ const struct nv50_hw_sm_query_cfg *cfg;
+ unsigned i;
+
+ hsq = screen->pm.mp_counter[c];
+ if (!hsq)
+ continue;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
+ for (i = 0; i < cfg->num_counters; i++) {
+ uint16_t func;
+
+ if (mask & (1 << hsq->ctr[i]))
+ break;
+
+ mask |= 1 << hsq->ctr[i];
+ func = nv50_hw_sm_get_func(hsq->ctr[i]);
+
+ BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+ PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
+ | cfg->ctr[i].unit | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static inline bool
+nv50_hw_sm_query_read_data(uint32_t count[32][4],
+ struct nv50_context *nv50, bool wait,
+ struct nv50_hw_query *hq,
+ const struct nv50_hw_sm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x14 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (hq->data[b + 4] != hq->sequence) {
+ if (!wait)
+ return false;
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client))
+ return false;
+ }
+ count[p][c] = hq->data[b + hsq->ctr[c]];
+ }
+ }
+ return true;
+}
+
+static boolean
+nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
+ boolean wait, union pipe_query_result *result)
+{
+ uint32_t count[32][4];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32);
+ unsigned p, c;
+ const struct nv50_hw_sm_query_cfg *cfg;
+ bool ret;
+
+ cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
+
+ ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+ if (!ret)
+ return false;
+
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+
+ /* We only count a single TP, and simply multiply by the total number of
+ * TPs to compute result over all TPs. This is inaccurate, but enough! */
+ value *= nv50->screen->TPs;
+
+ *(uint64_t *)result = value;
+ return true;
+}
+
+static const struct nv50_hw_query_funcs hw_sm_query_funcs = {
+ .destroy_query = nv50_hw_sm_destroy_query,
+ .begin_query = nv50_hw_sm_begin_query,
+ .end_query = nv50_hw_sm_end_query,
+ .get_query_result = nv50_hw_sm_get_query_result,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type)
+{
+ struct nv50_hw_sm_query *hsq;
+ struct nv50_hw_query *hq;
+ unsigned space;
+
+ if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST)
+ return NULL;
+
+ hsq = CALLOC_STRUCT(nv50_hw_sm_query);
+ if (!hsq)
+ return NULL;
+
+ hq = &hsq->base;
+ hq->funcs = &hw_sm_query_funcs;
+ hq->base.type = type;
+
+ /*
+ * for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.sequence
+ */
+ space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t);
+
+ if (!nv50_hw_query_allocate(nv50, &hq->base, space)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ return hq;
+}
+
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ int count = 0;
+
+ if (screen->compute)
+ if (screen->base.class_3d >= NV84_3D_CLASS)
+ count += NV50_HW_SM_QUERY_COUNT;
+
+ if (!info)
+ return count;
+
+ if (id < count) {
+ if (screen->compute) {
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ info->name = nv50_hw_sm_query_names[id];
+ info->query_type = NV50_HW_SM_QUERY(id);
+ info->group_id = NV50_HW_SM_QUERY_GROUP;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
new file mode 100644
index 00000000000..c1a1cd175e3
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h
@@ -0,0 +1,45 @@
+#ifndef __NV50_QUERY_HW_SM_H__
+#define __NV50_QUERY_HW_SM_H__
+
+#include "nv50_query_hw.h"
+
+struct nv50_hw_sm_query {
+ struct nv50_hw_query base;
+ uint8_t ctr[4];
+};
+
+static inline struct nv50_hw_sm_query *
+nv50_hw_sm_query(struct nv50_hw_query *hq)
+{
+ return (struct nv50_hw_sm_query *)hq;
+}
+
+/*
+ * Performance counter queries:
+ */
+#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1)
+enum nv50_hw_sm_queries
+{
+ NV50_HW_SM_QUERY_BRANCH = 0,
+ NV50_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NV50_HW_SM_QUERY_INSTRUCTIONS,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_0,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_1,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_2,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_3,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_4,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_5,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_6,
+ NV50_HW_SM_QUERY_PROF_TRIGGER_7,
+ NV50_HW_SM_QUERY_SM_CTA_LAUNCHED,
+ NV50_HW_SM_QUERY_WARP_SERIALIZE,
+ NV50_HW_SM_QUERY_COUNT,
+};
+
+struct nv50_hw_query *
+nv50_hw_sm_create_query(struct nv50_context *, unsigned);
+int
+nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned,
+ struct pipe_driver_query_info *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f47e998ab1e..1e4b75f18e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -41,8 +41,6 @@
#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -183,6 +181,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -212,7 +211,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
@@ -251,6 +249,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_COMPUTE:
break;
default:
return 0;
@@ -336,6 +335,52 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
@@ -377,6 +422,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
@@ -640,7 +686,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
@@ -742,6 +788,9 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
+ pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+ pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
nv50_screen_init_resource_functions(pscreen);
@@ -851,6 +900,8 @@ nv50_screen_create(struct nouveau_device *dev)
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
@@ -902,6 +953,12 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index ce51f0fc254..2a4983d1020 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -23,6 +23,10 @@ struct nv50_context;
#define NV50_MAX_VIEWPORTS 16
+#define NV50_MAX_GLOBALS 16
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
struct nv50_blitter;
struct nv50_graph_state {
@@ -66,6 +70,7 @@ struct nv50_screen {
unsigned MPsInTP;
unsigned max_tls_space;
unsigned cur_tls_space;
+ unsigned mp_count;
struct nouveau_heap *vp_code_heap;
struct nouveau_heap *gp_code_heap;
@@ -90,9 +95,16 @@ struct nv50_screen {
struct nouveau_bo *bo;
} fence;
+ struct {
+ struct nv50_program *prog; /* compute state object to read MP counters */
+ struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */
+ uint8_t num_hw_sm_active;
+ } pm;
+
struct nouveau_object *sync;
struct nouveau_object *tesla;
+ struct nouveau_object *compute;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
};
@@ -103,12 +115,19 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
}
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_group_info *);
+
bool nv50_blitter_create(struct nv50_screen *);
void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+int nv50_screen_compute_setup(struct nv50_screen *, struct nouveau_pushbuf *);
+
static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d27f12ca94b..b4ea08d4d13 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -792,6 +792,35 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_GMTYPROG;
}
+static void *
+nv50_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nv50_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->compprog = hwcso;
+ nv50->dirty_cp |= NV50_NEW_CP_PROGRAM;
+}
+
static void
nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@@ -1134,6 +1163,70 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
nv50->dirty |= NV50_NEW_STRMOUT;
}
+static void
+nv50_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ /* TODO: bind surfaces */
+}
+
+static inline void
+nv50_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nv50_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nv50->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nv50->global_residents, req_size);
+ memset((uint8_t *)nv50->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nv50_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nv50->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
+
+ nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+}
+
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@@ -1162,12 +1255,15 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->create_vs_state = nv50_vp_state_create;
pipe->create_fs_state = nv50_fp_state_create;
pipe->create_gs_state = nv50_gp_state_create;
+ pipe->create_compute_state = nv50_cp_state_create;
pipe->bind_vs_state = nv50_vp_state_bind;
pipe->bind_fs_state = nv50_fp_state_bind;
pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->bind_compute_state = nv50_cp_state_bind;
pipe->delete_vs_state = nv50_sp_state_delete;
pipe->delete_fs_state = nv50_sp_state_delete;
pipe->delete_gs_state = nv50_sp_state_delete;
+ pipe->delete_compute_state = nv50_sp_state_delete;
pipe->set_blend_color = nv50_set_blend_color;
pipe->set_stencil_ref = nv50_set_stencil_ref;
@@ -1191,6 +1287,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->stream_output_target_destroy = nv50_so_target_destroy;
pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+ pipe->set_global_binding = nv50_set_global_bindings;
+ pipe->set_compute_resources = nv50_set_compute_resources;
+
nv50->sample_mask = ~0;
nv50->min_samples = 1;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index b6181edf24f..02a759c23ad 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,8 +503,7 @@ static struct state_validate {
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
- NV50_NEW_VERTPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 916a7d44a31..8ba19d2cc90 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -339,12 +339,18 @@ nv50_clear_render_target(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -415,12 +421,18 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16) | dstx);
PUSH_DATA (push, (height << 16) | dsty);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
@@ -673,6 +685,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, (width << 16));
PUSH_DATA (push, (height << 16));
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
@@ -690,6 +705,9 @@ nv50_clear_buffer(struct pipe_context *pipe,
PUSH_DATA (push, 0x3c);
}
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9aa593f919e..85878d5fcc7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -294,8 +294,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
uint64_t addrs[PIPE_MAX_ATTRIBS];
uint32_t limits[PIPE_MAX_ATTRIBS];
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- struct nv50_vertex_stateobj dummy = {};
- struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
struct pipe_vertex_buffer *vb;
struct nv50_vertex_element *ve;
uint32_t mask;
@@ -303,14 +302,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
unsigned i;
const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
- /* A vertexid is not generated for inline data uploads. Have to use a
- * VBO. This check must come after the vertprog has been validated,
- * otherwise vertexid may be unset.
- */
- assert(nv50->vertprog->translated);
- if (nv50->vertprog->vp.vertexid)
- nv50->vbo_push_hint = 0;
-
if (unlikely(vertex->need_conversion))
nv50->vbo_fifo = ~0;
else
@@ -487,7 +478,7 @@ nv50_draw_arrays(struct nv50_context *nv50,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, 0);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, 0);
}
nv50->state.index_bias = 0;
@@ -613,7 +604,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
- BEGIN_NV04(push, SUBC_3D(NV84_3D_VERTEX_ID_BASE), 1);
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
PUSH_DATA (push, index_bias);
}
nv50->state.index_bias = index_bias;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 76f1b41ea70..68002305d72 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_3D(m) 3, (m)
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NV84_3D(n) SUBC_3D(NV84_3D_##n)
#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
#define SUBC_2D(m) 4, (m)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 82ed5a1864e..162661ff2a7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ if (bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
}
- if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
- if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER |
+ PIPE_BIND_SAMPLER_VIEW)) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f53921092a5..d992b10a23c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+ info->flags = 0;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
@@ -200,7 +201,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
/* Because we can't expose the number of hardware counters needed for
* each different query, we don't want to allow more than one active
@@ -224,7 +224,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
if (screen->compute) {
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = "Performance metrics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
info->max_active_queries = 1;
info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
return 1;
@@ -234,7 +233,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
@@ -245,7 +243,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
info->name = "this_is_not_the_query_group_you_are_looking_for";
info->max_active_queries = 0;
info->num_queries = 0;
- info->type = 0;
return 0;
}
@@ -260,4 +257,5 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
pipe->render_condition = nvc0_render_condition;
+ nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 44b222e5134..7962143d45a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1014,14 +1014,15 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->type = PIPE_SHADER_COMPUTE;
prog->translated = true;
- prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
+ prog->num_gprs = 14;
} else {
prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
+ prog->num_gprs = 12;
}
screen->pm.prog = prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index cdb1fc1145f..6a4ae5be2ab 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -341,12 +341,16 @@ nvc0_clear_render_target(struct pipe_context *pipe,
nvc0_resource_fence(res, NOUVEAU_BO_WR);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, 0x3c |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
@@ -470,6 +474,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
if (width * height != elements) {
@@ -486,6 +492,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -545,12 +553,16 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
PUSH_DATA (push, dst->u.tex.first_layer);
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
for (z = 0; z < sf->depth; ++z) {
PUSH_DATA (push, mode |
(z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
}
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index f63790c329e..1dbad2f39e3 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,12 +7,14 @@ C_SOURCES := \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
+ r600_query.h \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce_40_2_2.c \
radeon_vce_50.c \
+ radeon_vce_52.c \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3599692a857..7464f677398 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "tgsi/tgsi_parse.h"
+#include "util/list.h"
#include "util/u_draw_quad.h"
#include "util/u_memory.h"
#include "util/u_format_s3tc.h"
@@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
void r600_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
- ctx->queries_suspended_for_flush = false;
- if (ctx->num_cs_dw_nontimer_queries_suspend) {
+ if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
r600_suspend_nontimer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
r600_suspend_timer_queries(ctx);
- ctx->queries_suspended_for_flush = true;
- }
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (ctx->queries_suspended_for_flush) {
- r600_resume_nontimer_queries(ctx);
+ if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
r600_resume_timer_queries(ctx);
- }
+ if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
+ r600_resume_nontimer_queries(ctx);
}
static void r600_flush_from_st(struct pipe_context *ctx,
@@ -718,50 +717,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen *screen)
rscreen->info.r600_clock_crystal_freq;
}
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
- struct pipe_driver_query_info list[] = {
- {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
- {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
- {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
- PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
- {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
- {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
- {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
- {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
- };
- unsigned num_queries;
-
- if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
- num_queries = Elements(list);
- else if (rscreen->info.drm_major == 3)
- num_queries = Elements(list) - 3;
- else
- num_queries = Elements(list) - 4;
-
- if (!info)
- return num_queries;
-
- if (index >= num_queries)
- return 0;
-
- *info = list[index];
- return 1;
-}
-
static void r600_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
@@ -949,7 +904,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
- rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
@@ -965,6 +919,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
}
r600_init_screen_texture_functions(rscreen);
+ r600_init_screen_query_functions(rscreen);
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index ebe633b9125..fbdc5c410ae 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
-#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
@@ -408,8 +393,6 @@ struct r600_common_context {
struct list_head active_timer_queries;
unsigned num_cs_dw_nontimer_queries_suspend;
unsigned num_cs_dw_timer_queries_suspend;
- /* If queries have been suspended. */
- bool queries_suspended_for_flush;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -526,6 +509,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
/* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 8c2b601a96c..b1cfb6e462b 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,81 +22,218 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "r600_query.h"
#include "r600_cs.h"
#include "util/u_memory.h"
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+ struct r600_query b;
-struct r600_query_buffer {
- /* The buffer where query results are stored. */
- struct r600_resource *buf;
- /* Offset of the next free result after current query data */
- unsigned results_end;
- /* If a query buffer is full, a new buffer is created and the old one
- * is put in here. When we calculate the result, we sum up the samples
- * from all buffers. */
- struct r600_query_buffer *previous;
-};
-
-struct r600_query {
- /* The query buffer and how many results are in it. */
- struct r600_query_buffer buffer;
- /* The type of query */
- unsigned type;
- /* Size of the result in memory for both begin_query and end_query,
- * this can be one or two numbers, or it could even be a size of a structure. */
- unsigned result_size;
- /* The number of dwords for begin_query or end_query. */
- unsigned num_cs_dw;
- /* linked list of queries */
- struct list_head list;
- /* for custom non-GPU queries */
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
- /* For transform feedback: which stream the query is for */
- unsigned stream;
};
-
-static bool r600_is_timer_query(unsigned type)
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
- return type == PIPE_QUERY_TIME_ELAPSED ||
- type == PIPE_QUERY_TIMESTAMP;
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ screen->fence_reference(screen, &query->fence, NULL);
+ FREE(query);
}
-static bool r600_query_needs_begin(unsigned type)
+static enum radeon_value_id winsys_id_from_type(unsigned type)
{
- return type != PIPE_QUERY_GPU_FINISHED &&
- type != PIPE_QUERY_TIMESTAMP;
+ switch (type) {
+ case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+ case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+ case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+ case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+ case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+ case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+ case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+ case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+ case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ default: unreachable("query type does not correspond to winsys id");
+ }
}
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
- unsigned j, i, num_results, buf_size = 4096;
- uint32_t *results;
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
- /* Non-GPU queries. */
- switch (type) {
+ switch(query->b.type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
+ break;
case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ query->begin_result = 0;
+ break;
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GPU_LOAD:
+ query->begin_result = r600_gpu_load_begin(rctx->screen);
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_begin: bad query type");
+ }
+
+ return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ rctx->b.flush(&rctx->b, &query->fence, 0);
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_CS_FLUSHES:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
case R600_QUERY_GPU_LOAD:
+ query->end_result = r600_gpu_load_end(rctx->screen,
+ query->begin_result);
+ query->begin_result = 0;
+ break;
case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_end: bad query type");
+ }
+}
+
+static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* Convert from cycles per millisecond to cycles per second (Hz). */
+ result->timestamp_disjoint.frequency =
+ (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ result->timestamp_disjoint.disjoint = FALSE;
+ return TRUE;
+ case PIPE_QUERY_GPU_FINISHED: {
+ struct pipe_screen *screen = rctx->b.screen;
+ result->b = screen->fence_finish(screen, query->fence,
+ wait ? PIPE_TIMEOUT_INFINITE : 0);
+ return result->b;
+ }
+ }
+
+ result->u64 = query->end_result - query->begin_result;
+
+ switch (query->b.type) {
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GPU_TEMPERATURE:
+ result->u64 /= 1000;
+ break;
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ result->u64 *= 1000000;
+ break;
+ }
+
+ return TRUE;
+}
+
+static struct r600_query_ops sw_query_ops = {
+ .destroy = r600_query_sw_destroy,
+ .begin = r600_query_sw_begin,
+ .end = r600_query_sw_end,
+ .get_result = r600_query_sw_get_result
+};
+
+static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
+ unsigned query_type)
+{
+ struct r600_query_sw *query;
+
+ query = CALLOC_STRUCT(r600_query_sw);
+ if (query == NULL)
return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &sw_query_ops;
+
+ return (struct pipe_query *)query;
+}
+
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Release all query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+ FREE(qbuf);
}
+ pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+ FREE(rquery);
+}
+
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ unsigned buf_size = 4096;
+
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
@@ -105,14 +242,30 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, buf_size);
- switch (type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
- memset(results, 0, buf_size);
+ if (query->flags & R600_QUERY_HW_FLAG_PREDICATE)
+ query->ops->prepare_buffer(ctx, query, buf);
+
+ return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer)
+{
+ /* Callers ensure that the buffer is currently unused by the GPU. */
+ uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+
+ memset(results, 0, buffer->b.b.width0);
+
+ if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ unsigned num_results;
+ unsigned i, j;
/* Set top bits for unused backends. */
- num_results = buf_size / (16 * ctx->max_db);
+ num_results = buffer->b.b.width0 / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1<<i))) {
@@ -122,22 +275,109 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
}
results += 4 * ctx->max_db;
}
+ }
+}
+
+static struct r600_query_ops query_hw_ops = {
+ .destroy = r600_query_hw_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result,
+};
+
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+ union pipe_query_result *);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+ .prepare_buffer = r600_query_hw_prepare_buffer,
+ .emit_start = r600_query_hw_do_emit_start,
+ .emit_stop = r600_query_hw_do_emit_stop,
+ .clear_result = r600_query_hw_clear_result,
+ .add_result = r600_query_hw_add_result,
+};
+
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ query->buffer.buf = r600_new_query_buffer(rctx, query);
+ if (!query->buffer.buf)
+ return FALSE;
+
+ return TRUE;
+}
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
+ unsigned query_type,
+ unsigned index)
+{
+ struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &query_hw_ops;
+ query->ops = &query_hw_default_hw_ops;
+
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ query->result_size = 16 * rctx->max_db;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
break;
case PIPE_QUERY_TIME_ELAPSED:
+ query->result_size = 16;
+ query->num_cs_dw_begin = 8;
+ query->num_cs_dw_end = 8;
+ query->flags = R600_QUERY_HW_FLAG_TIMER;
+ break;
case PIPE_QUERY_TIMESTAMP:
+ query->result_size = 8;
+ query->num_cs_dw_end = 8;
+ query->flags = R600_QUERY_HW_FLAG_TIMER |
+ R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->stream = index;
+ query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
- memset(results, 0, buf_size);
+ /* 11 values on EG, 8 on R600. */
+ query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
break;
default:
assert(0);
+ FREE(query);
+ return NULL;
}
- return buf;
+
+ if (!r600_query_hw_init(rctx, query)) {
+ FREE(query);
+ return NULL;
+ }
+
+ return (struct pipe_query *)query;
}
static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
@@ -159,7 +399,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
}
}
-static unsigned event_type_for_stream(struct r600_query *query)
+static unsigned event_type_for_stream(struct r600_query_hw *query)
{
switch (query->stream) {
default:
@@ -170,28 +410,14 @@ static unsigned event_type_for_stream(struct r600_query *query)
}
}
-static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- uint64_t va;
-
- r600_update_occlusion_query_state(ctx, query->type, 1);
- r600_update_prims_generated_query_state(ctx, query->type, 1);
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
-
- /* Get a new query buffer if needed. */
- if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
- struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
- *qbuf = query->buffer;
- query->buffer.buf = r600_new_query_buffer(ctx, query->type);
- query->buffer.results_end = 0;
- query->buffer.previous = qbuf;
- }
-
- /* emit begin query */
- va = query->buffer.buf->gpu_address + query->buffer.results_end;
- switch (query->type) {
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -227,30 +453,50 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
}
r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
-
- if (r600_is_timer_query(query->type))
- ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
- else
- ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
}
-static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
+static void r600_query_hw_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
uint64_t va;
- /* The queries which need begin already called this in begin_query. */
- if (!r600_query_needs_begin(query->type)) {
- ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+ r600_update_occlusion_query_state(ctx, query->b.type, 1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, 1);
+
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
+ TRUE);
+
+ /* Get a new query buffer if needed. */
+ if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
+ struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+ *qbuf = query->buffer;
+ query->buffer.buf = r600_new_query_buffer(ctx, query);
+ query->buffer.results_end = 0;
+ query->buffer.previous = qbuf;
}
- va = query->buffer.buf->gpu_address;
+ /* emit begin query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_start(ctx, query, query->buffer.buf, va);
- /* emit end query */
- switch (query->type) {
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
+ else
+ ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
+}
+
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- va += query->buffer.results_end + 8;
+ va += 8;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
@@ -260,14 +506,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32) & 0xFFFF);
break;
case PIPE_QUERY_TIME_ELAPSED:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
@@ -278,7 +524,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
radeon_emit(cs, 0);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- va += query->buffer.results_end + query->result_size/2;
+ va += query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
@@ -289,25 +535,41 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
}
r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ /* The queries which need begin already called this in begin_query. */
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE);
+ }
+
+ /* emit end query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_stop(ctx, query, query->buffer.buf, va);
query->buffer.results_end += query->result_size;
- if (r600_query_needs_begin(query->type)) {
- if (r600_is_timer_query(query->type))
- ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end;
else
- ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
+ ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end;
}
- r600_update_occlusion_query_state(ctx, query->type, -1);
- r600_update_prims_generated_query_state(ctx, query->type, -1);
+ r600_update_occlusion_query_state(ctx, query->b.type, -1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, -1);
}
static void r600_emit_query_predication(struct r600_common_context *ctx,
struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- struct r600_query *query = (struct r600_query*)ctx->render_cond;
+ struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
struct r600_query_buffer *qbuf;
uint32_t op;
bool flag_wait;
@@ -318,7 +580,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
- switch (query->type) {
+ switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
op = PRED_OP(PREDICATION_OP_ZPASS);
@@ -364,94 +626,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *query;
- bool skip_allocation = false;
- query = CALLOC_STRUCT(r600_query);
- if (query == NULL)
- return NULL;
-
- query->type = query_type;
-
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- query->result_size = 16 * rctx->max_db;
- query->num_cs_dw = 6;
- break;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- query->result_size = 16;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_TIMESTAMP:
- query->result_size = 8;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
- query->result_size = 32;
- query->num_cs_dw = 6;
- query->stream = index;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- /* 11 values on EG, 8 on R600. */
- query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
- query->num_cs_dw = 6;
- break;
- /* Non-GPU queries and queries not requiring a buffer. */
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- skip_allocation = true;
- break;
- default:
- assert(0);
- FREE(query);
- return NULL;
- }
+ if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+ query_type == PIPE_QUERY_GPU_FINISHED ||
+ query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return r600_query_sw_create(ctx, query_type);
- if (!skip_allocation) {
- query->buffer.buf = r600_new_query_buffer(rctx, query_type);
- if (!query->buffer.buf) {
- FREE(query);
- return NULL;
- }
- }
- return (struct pipe_query*)query;
+ return r600_query_hw_create(rctx, query_type, index);
}
static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
{
- struct r600_query *rquery = (struct r600_query*)query;
- struct r600_query_buffer *prev = rquery->buffer.previous;
-
- /* Release all query buffers. */
- while (prev) {
- struct r600_query_buffer *qbuf = prev;
- prev = prev->previous;
- pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
- FREE(qbuf);
- }
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
- pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
- FREE(query);
+ rquery->ops->destroy(rctx, rquery);
}
static boolean r600_begin_query(struct pipe_context *ctx,
@@ -459,48 +648,14 @@ static boolean r600_begin_query(struct pipe_context *ctx,
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
- struct r600_query_buffer *prev = rquery->buffer.previous;
- if (!r600_query_needs_begin(rquery->type)) {
- assert(0);
- return false;
- }
+ return rquery->ops->begin(rctx, rquery);
+}
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return true;
- case R600_QUERY_DRAW_CALLS:
- rquery->begin_result = rctx->num_draw_calls;
- return true;
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->begin_result = 0;
- return true;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return true;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return true;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return true;
- case R600_QUERY_GPU_LOAD:
- rquery->begin_result = r600_gpu_load_begin(rctx->screen);
- return true;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
- return true;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return true;
- }
+static void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ struct r600_query_buffer *prev = query->buffer.previous;
/* Discard the old query buffers. */
while (prev) {
@@ -510,22 +665,39 @@ static boolean r600_begin_query(struct pipe_context *ctx,
FREE(qbuf);
}
- /* Obtain a new buffer if the current one can't be mapped without a stall. */
- if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
- !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
- pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
- rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
+ if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) {
+ /* Obtain a new buffer if the current one can't be mapped without a stall. */
+ if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
+ query->buffer.buf = r600_new_query_buffer(rctx, query);
+ } else {
+ query->ops->prepare_buffer(rctx, query, query->buffer.buf);
+ }
}
- rquery->buffer.results_end = 0;
- rquery->buffer.previous = NULL;
+ query->buffer.results_end = 0;
+ query->buffer.previous = NULL;
+}
- r600_emit_query_begin(rctx, rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
- if (r600_is_timer_query(rquery->type))
- LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ assert(0);
+ return false;
+ }
+
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_query_hw_emit_start(rctx, query);
+
+ if (query->flags & R600_QUERY_HW_FLAG_TIMER)
+ LIST_ADDTAIL(&query->list, &rctx->active_timer_queries);
else
- LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
+ LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries);
return true;
}
@@ -534,64 +706,24 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return;
- case PIPE_QUERY_GPU_FINISHED:
- ctx->flush(ctx, &rquery->fence, 0);
- return;
- case R600_QUERY_DRAW_CALLS:
- rquery->end_result = rctx->num_draw_calls;
- return;
- case R600_QUERY_REQUESTED_VRAM:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
- return;
- case R600_QUERY_REQUESTED_GTT:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
- return;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return;
- case R600_QUERY_VRAM_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
- return;
- case R600_QUERY_GTT_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
- return;
- case R600_QUERY_GPU_TEMPERATURE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
- return;
- case R600_QUERY_CURRENT_GPU_SCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
- return;
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
- return;
- case R600_QUERY_GPU_LOAD:
- rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
- return;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
- return;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return;
- }
+ rquery->ops->end(rctx, rquery);
+}
- r600_emit_query_end(rctx, rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START)
+ r600_query_hw_reset_buffers(rctx, query);
- if (r600_query_needs_begin(rquery->type))
- LIST_DELINIT(&rquery->list);
+ r600_query_hw_emit_stop(rctx, query);
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ LIST_DELINIT(&query->list);
}
-static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
bool test_status_bit)
{
uint32_t *current_result = (uint32_t*)map;
@@ -609,80 +741,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned
return 0;
}
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
- struct r600_query *query,
- struct r600_query_buffer *qbuf,
- boolean wait,
- union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ void *buffer,
+ union pipe_query_result *result)
{
- struct pipe_screen *screen = ctx->b.screen;
- unsigned results_base = 0;
- char *map;
-
- /* Non-GPU queries. */
- switch (query->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* Convert from cycles per millisecond to cycles per second (Hz). */
- result->timestamp_disjoint.frequency =
- (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
- result->timestamp_disjoint.disjoint = FALSE;
- return TRUE;
- case PIPE_QUERY_GPU_FINISHED:
- result->b = screen->fence_finish(screen, query->fence,
- wait ? PIPE_TIMEOUT_INFINITE : 0);
- return result->b;
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- result->u64 = query->end_result - query->begin_result;
- return TRUE;
- case R600_QUERY_GPU_LOAD:
- result->u64 = query->end_result;
- return TRUE;
- }
-
- map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
- PIPE_TRANSFER_READ |
- (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
- if (!map)
- return FALSE;
-
- /* count all results across all data blocks */
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- while (results_base != qbuf->results_end) {
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: {
+ unsigned results_base = 0;
+ while (results_base != query->result_size) {
result->u64 +=
- r600_query_read_result(map + results_base, 0, 2, true);
+ r600_query_read_result(buffer + results_base, 0, 2, true);
results_base += 16;
}
break;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- while (results_base != qbuf->results_end) {
+ }
+ case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ unsigned results_base = 0;
+ while (results_base != query->result_size) {
result->b = result->b ||
- r600_query_read_result(map + results_base, 0, 2, true) != 0;
+ r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
results_base += 16;
}
break;
+ }
case PIPE_QUERY_TIME_ELAPSED:
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 0, 2, false);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 0, 2, false);
break;
case PIPE_QUERY_TIMESTAMP:
{
- uint32_t *current_result = (uint32_t*)map;
+ uint32_t *current_result = (uint32_t*)buffer;
result->u64 = (uint64_t)current_result[0] |
(uint64_t)current_result[1] << 32;
break;
@@ -694,84 +782,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
* u64 PrimitiveStorageNeeded;
* }
* We only need NumPrimitivesWritten here. */
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 2, 6, true);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 2, 6, true);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
/* Here we read PrimitiveStorageNeeded. */
- while (results_base != qbuf->results_end) {
- result->u64 +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->u64 += r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_SO_STATISTICS:
- while (results_base != qbuf->results_end) {
- result->so_statistics.num_primitives_written +=
- r600_query_read_result(map + results_base, 2, 6, true);
- result->so_statistics.primitives_storage_needed +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->so_statistics.num_primitives_written +=
+ r600_query_read_result(buffer, 2, 6, true);
+ result->so_statistics.primitives_storage_needed +=
+ r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- while (results_base != qbuf->results_end) {
- result->b = result->b ||
- r600_query_read_result(map + results_base, 2, 6, true) !=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base += query->result_size;
- }
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
if (ctx->chip_class >= EVERGREEN) {
- while (results_base != qbuf->results_end) {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(map + results_base, 0, 22, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(map + results_base, 2, 24, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(map + results_base, 4, 26, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(map + results_base, 6, 28, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(map + results_base, 8, 30, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(map + results_base, 10, 32, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(map + results_base, 12, 34, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(map + results_base, 14, 36, false);
- result->pipeline_statistics.hs_invocations +=
- r600_query_read_result(map + results_base, 16, 38, false);
- result->pipeline_statistics.ds_invocations +=
- r600_query_read_result(map + results_base, 18, 40, false);
- result->pipeline_statistics.cs_invocations +=
- r600_query_read_result(map + results_base, 20, 42, false);
- results_base += query->result_size;
- }
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(buffer, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(buffer, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(buffer, 20, 42, false);
} else {
- while (results_base != qbuf->results_end) {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(map + results_base, 0, 16, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(map + results_base, 2, 18, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(map + results_base, 4, 20, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(map + results_base, 6, 22, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(map + results_base, 8, 24, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(map + results_base, 10, 26, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(map + results_base, 12, 28, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(map + results_base, 14, 30, false);
- results_base += query->result_size;
- }
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 16, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 18, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 20, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 22, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 24, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 26, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 28, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 30, false);
}
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
@@ -793,23 +861,47 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
default:
assert(0);
}
-
- return TRUE;
}
static boolean r600_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean wait, union pipe_query_result *result)
+ struct pipe_query *query, boolean wait,
+ union pipe_query_result *result)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->get_result(rctx, rquery, wait, result);
+}
+
+static void r600_query_hw_clear_result(struct r600_query_hw *query,
+ union pipe_query_result *result)
+{
+ util_query_clear_result(result, query->b.type);
+}
+
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait, union pipe_query_result *result)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
- util_query_clear_result(result, rquery->type);
+ query->ops->clear_result(query, result);
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
- if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned results_base = 0;
+ void *map;
+
+ map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf,
+ PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+ if (!map)
return FALSE;
+
+ while (results_base != qbuf->results_end) {
+ query->ops->add_result(rctx, query, map + results_base,
+ result);
+ results_base += query->result_size;
}
}
@@ -827,7 +919,7 @@ static void r600_render_condition(struct pipe_context *ctx,
uint mode)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_query *rquery = (struct r600_query*)query;
+ struct r600_query_hw *rquery = (struct r600_query_hw *)query;
struct r600_query_buffer *qbuf;
struct r600_atom *atom = &rctx->render_cond_atom;
@@ -837,8 +929,10 @@ static void r600_render_condition(struct pipe_context *ctx,
/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
- atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+ if (query) {
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+ }
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
@@ -847,10 +941,10 @@ static void r600_suspend_queries(struct r600_common_context *ctx,
struct list_head *query_list,
unsigned *num_cs_dw_queries_suspend)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
LIST_FOR_EACH_ENTRY(query, query_list, list) {
- r600_emit_query_end(ctx, query);
+ r600_query_hw_emit_stop(ctx, query);
}
assert(*num_cs_dw_queries_suspend == 0);
}
@@ -870,19 +964,19 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx)
static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
struct list_head *query_list)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
unsigned num_dw = 0;
LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
- num_dw += query->num_cs_dw * 2;
+ num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
/* Workaround for the fact that
* num_cs_dw_nontimer_queries_suspend is incremented for every
* resumed query, which raises the bar in need_cs_space for
* queries about to be resumed.
*/
- num_dw += query->num_cs_dw;
+ num_dw += query->num_cs_dw_end;
}
/* primitives generated query */
num_dw += ctx->streamout.enable_atom.num_dw;
@@ -896,7 +990,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
struct list_head *query_list,
unsigned *num_cs_dw_queries_suspend)
{
- struct r600_query *query;
+ struct r600_query_hw *query;
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
assert(*num_cs_dw_queries_suspend == 0);
@@ -905,7 +999,7 @@ static void r600_resume_queries(struct r600_common_context *ctx,
ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
LIST_FOR_EACH_ENTRY(query, query_list, list) {
- r600_emit_query_begin(ctx, query);
+ r600_query_hw_emit_start(ctx, query);
}
}
@@ -1002,6 +1096,76 @@ err:
return;
}
+#define X(name_, query_type_, type_, result_type_) \
+ { \
+ .name = name_, \
+ .query_type = R600_QUERY_##query_type_, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+ .group_id = ~(unsigned)0 \
+ }
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+ X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
+ X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE),
+ X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+ X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
+ X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
+ X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, CUMULATIVE),
+ X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
+ X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
+ X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
+ X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+ return Elements(r600_driver_query_list);
+ else if (rscreen->info.drm_major == 3)
+ return Elements(r600_driver_query_list) - 3;
+ else
+ return Elements(r600_driver_query_list) - 4;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned num_queries = r600_get_num_queries(rscreen);
+
+ if (!info)
+ return num_queries;
+
+ if (index >= num_queries)
+ return 0;
+
+ *info = r600_driver_query_list[index];
+
+ switch (info->query_type) {
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_VRAM_USAGE:
+ info->max_value.u64 = rscreen->info.vram_size;
+ break;
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_GTT_USAGE:
+ info->max_value.u64 = rscreen->info.gart_size;
+ break;
+ case R600_QUERY_GPU_TEMPERATURE:
+ info->max_value.u64 = 125;
+ break;
+ }
+
+ return 1;
+}
+
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
@@ -1017,3 +1181,8 @@ void r600_query_init(struct r600_common_context *rctx)
LIST_INITHEAD(&rctx->active_nontimer_queries);
LIST_INITHEAD(&rctx->active_timer_queries);
}
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+}
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 00000000000..0ea5707ca45
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle <[email protected]>
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+#include "util/list.h"
+
+struct r600_common_context;
+struct r600_query;
+struct r600_query_hw;
+struct r600_resource;
+
+#define R600_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100)
+
+struct r600_query_ops {
+ void (*destroy)(struct r600_common_context *, struct r600_query *);
+ boolean (*begin)(struct r600_common_context *, struct r600_query *);
+ void (*end)(struct r600_common_context *, struct r600_query *);
+ boolean (*get_result)(struct r600_common_context *,
+ struct r600_query *, boolean wait,
+ union pipe_query_result *result);
+};
+
+struct r600_query {
+ struct r600_query_ops *ops;
+
+ /* The type of query */
+ unsigned type;
+};
+
+enum {
+ R600_QUERY_HW_FLAG_NO_START = (1 << 0),
+ R600_QUERY_HW_FLAG_TIMER = (1 << 1),
+ R600_QUERY_HW_FLAG_PREDICATE = (1 << 2),
+};
+
+struct r600_query_hw_ops {
+ void (*prepare_buffer)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
+ void (*add_result)(struct r600_common_context *ctx,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+};
+
+struct r600_query_buffer {
+ /* The buffer where query results are stored. */
+ struct r600_resource *buf;
+ /* Offset of the next free result after current query data */
+ unsigned results_end;
+ /* If a query buffer is full, a new buffer is created and the old one
+ * is put in here. When we calculate the result, we sum up the samples
+ * from all buffers. */
+ struct r600_query_buffer *previous;
+};
+
+struct r600_query_hw {
+ struct r600_query b;
+ struct r600_query_hw_ops *ops;
+ unsigned flags;
+
+ /* The query buffer and how many results are in it. */
+ struct r600_query_buffer buffer;
+ /* Size of the result in memory for both begin_query and end_query,
+ * this can be one or two numbers, or it could even be a size of a structure. */
+ unsigned result_size;
+ /* The number of dwords for begin_query or end_query. */
+ unsigned num_cs_dw_begin;
+ unsigned num_cs_dw_end;
+ /* Linked list of queries */
+ struct list_head list;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
+};
+
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+ struct r600_query_hw *query);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait,
+ union pipe_query_result *result);
+
+#endif /* R600_QUERY_H */
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 0dac6fbbdce..8a60441c056 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -49,6 +49,7 @@
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
/**
* flush commands to the hardware
@@ -405,7 +406,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->use_vm = true;
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
enc->use_vui = true;
- if (rscreen->info.family >= CHIP_TONGA)
+ if (rscreen->info.family >= CHIP_TONGA &&
+ rscreen->info.family != CHIP_STONEY)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
if ((rscreen->info.family >= CHIP_TONGA) &&
@@ -478,6 +480,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
radeon_vce_50_init(enc);
break;
+ case FW_52_0_3:
+ radeon_vce_52_init(enc);
+ break;
+
default:
goto error;
}
@@ -500,11 +506,17 @@ error:
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
- return rscreen->info.vce_fw_version == FW_40_2_2 ||
- rscreen->info.vce_fw_version == FW_50_0_1 ||
- rscreen->info.vce_fw_version == FW_50_1_2 ||
- rscreen->info.vce_fw_version == FW_50_10_2 ||
- rscreen->info.vce_fw_version == FW_50_17_3;
+ switch (rscreen->info.vce_fw_version) {
+ case FW_40_2_2:
+ case FW_50_0_1:
+ case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
+ case FW_52_0_3:
+ return true;
+ default:
+ return false;
+ }
}
/**
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 624bda479f8..25e2133521f 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -140,4 +140,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
void radeon_vce_50_init(struct rvce_encoder *enc);
+/* init vce fw 52 specific callbacks */
+void radeon_vce_52_init(struct rvce_encoder *enc);
+
#endif
diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c
new file mode 100644
index 00000000000..fbae1f97f41
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -0,0 +1,242 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+
+static void create(struct rvce_encoder *enc)
+{
+ enc->task_info(enc, 0x00000000, 0, 0, 0);
+
+ RVCE_BEGIN(0x01000001); // create cmd
+ RVCE_CS(0x00000000); // encUseCircularBuffer
+ RVCE_CS(profiles[enc->base.profile -
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+ RVCE_CS(enc->base.level); // encLevel
+ RVCE_CS(0x00000000); // encPicStructRestriction
+ RVCE_CS(enc->base.width); // encImageWidth
+ RVCE_CS(enc->base.height); // encImageHeight
+ RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
+ RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
+ RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+ RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
+
+ RVCE_CS(0x00000000); // encPreEncodeContextBufferOffset
+ RVCE_CS(0x00000000); // encPreEncodeInputLumaBufferOffset
+ RVCE_CS(0x00000000); // encPreEncodeInputChromaBufferOffs
+ RVCE_CS(0x00000000); // encPreEncodeMode|ChromaFlag|VBAQMode|SceneChangeSensitivity
+ RVCE_END();
+}
+
+static void encode(struct rvce_encoder *enc)
+{
+ signed luma_offset, chroma_offset, bs_offset;
+ unsigned dep, bs_idx = enc->bs_idx++;
+ int i;
+
+ if (enc->dual_inst) {
+ if (bs_idx == 0)
+ dep = 1;
+ else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ dep = 0;
+ else
+ dep = 2;
+ } else
+ dep = 0;
+
+ enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
+
+ RVCE_BEGIN(0x05000001); // context buffer
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
+ RVCE_END();
+
+ bs_offset = -(signed)(bs_idx * enc->bs_size);
+
+ RVCE_BEGIN(0x05000004); // video bitstream buffer
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
+ RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+ RVCE_END();
+
+ if (enc->dual_pipe) {
+ unsigned aux_offset = enc->cpb.res->buf->size -
+ RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ RVCE_BEGIN(0x05000002); // auxiliary buffer
+ for (i = 0; i < 8; ++i) {
+ RVCE_CS(aux_offset);
+ aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+ }
+ for (i = 0; i < 8; ++i)
+ RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+ RVCE_END();
+ }
+
+ RVCE_BEGIN(0x03000001); // encode
+ RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+ RVCE_CS(0x00000000); // forceRefreshMap
+ RVCE_CS(0x00000000); // insertAUD
+ RVCE_CS(0x00000000); // endOfSequence
+ RVCE_CS(0x00000000); // endOfStream
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
+ RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+ RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+ RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+ if (enc->dual_pipe)
+ RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ else
+ RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ RVCE_CS(0x00000000); // encInputPicTileConfig
+ RVCE_CS(enc->pic.picture_type); // encPicType
+ RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+ RVCE_CS(0x00000000); // encIdrPicId
+ RVCE_CS(0x00000000); // encMGSKeyPic
+ RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+ RVCE_CS(0x00000000); // encTemporalLayerIndex
+ RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+ RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+ RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+
+ i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+ if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+ RVCE_CS(0x00000001); // encRefListModificationOp
+ RVCE_CS(i - 1); // encRefListModificationNum
+ } else {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+
+ for (i = 0; i < 3; ++i) {
+ RVCE_CS(0x00000000); // encRefListModificationOp
+ RVCE_CS(0x00000000); // encRefListModificationNum
+ }
+ for (i = 0; i < 4; ++i) {
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+ RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+ RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+ }
+
+ // encReferencePictureL0[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+ enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l0 = l0_slot(enc);
+ rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ RVCE_CS(l0->picture_type); // encPicType
+ RVCE_CS(l0->frame_num); // frameNumber
+ RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ // encReferencePictureL0[1]
+ RVCE_CS(0x00000000); // pictureStructure
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+
+ // encReferencePictureL1[0]
+ RVCE_CS(0x00000000); // pictureStructure
+ if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ struct rvce_cpb_slot *l1 = l1_slot(enc);
+ rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ RVCE_CS(l1->picture_type); // encPicType
+ RVCE_CS(l1->frame_num); // frameNumber
+ RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+ RVCE_CS(luma_offset); // lumaOffset
+ RVCE_CS(chroma_offset); // chromaOffset
+ } else {
+ RVCE_CS(0x00000000); // encPicType
+ RVCE_CS(0x00000000); // frameNumber
+ RVCE_CS(0x00000000); // pictureOrderCount
+ RVCE_CS(0xffffffff); // lumaOffset
+ RVCE_CS(0xffffffff); // chromaOffset
+ }
+
+ rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ RVCE_CS(luma_offset); // encReconstructedLumaOffset
+ RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+ RVCE_CS(0x00000000); // encColocBufferOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+ RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+ RVCE_CS(0x00000000); // pictureCount
+ RVCE_CS(enc->pic.frame_num); // frameNumber
+ RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+ RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+ RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+ RVCE_CS(0x00000000); // enableIntraRefresh
+
+ RVCE_CS(0x00000000); // aq_variance_en
+ RVCE_CS(0x00000000); // aq_block_size
+ RVCE_CS(0x00000000); // aq_mb_variance_sel
+ RVCE_CS(0x00000000); // aq_frame_variance_sel
+ RVCE_CS(0x00000000); // aq_param_a
+ RVCE_CS(0x00000000); // aq_param_b
+ RVCE_CS(0x00000000); // aq_param_c
+ RVCE_CS(0x00000000); // aq_param_d
+ RVCE_CS(0x00000000); // aq_param_e
+
+ RVCE_CS(0x00000000); // contextInSFB
+ RVCE_END();
+}
+
+void radeon_vce_52_init(struct rvce_encoder *enc)
+{
+ radeon_vce_50_init(enc);
+
+ enc->create = create;
+ enc->encode = encode;
+}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 93847d5ec2f..209b940aa11 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3256,25 +3256,34 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- /* GRBM_GFX_INDEX is privileged on VI */
- if (sctx->b.chip_class <= CIK)
+ /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+ if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
SE_INDEX(se) | SH_BROADCAST_WRITES |
INSTANCE_BROADCAST_WRITES);
+ else
+ si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
- /* GRBM_GFX_INDEX is privileged on VI */
- if (sctx->b.chip_class <= CIK)
+ /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
+ if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
INSTANCE_BROADCAST_WRITES);
+ else
+ si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
}
static void si_init_config(struct si_context *sctx)
{
+ struct si_screen *sscreen = sctx->screen;
unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
unsigned raster_config, raster_config_1;
@@ -3345,9 +3354,14 @@ static void si_init_config(struct si_context *sctx)
raster_config_1 = 0x0000002e;
break;
case CHIP_FIJI:
- /* Fiji should be same as Hawaii, but that causes corruption in some cases */
- raster_config = 0x16000012; /* 0x3a00161a */
- raster_config_1 = 0x0000002a; /* 0x0000002e */
+ if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
+ /* old kernels with old tiling config */
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ } else {
+ raster_config = 0x3a00161a;
+ raster_config_1 = 0x0000002e;
+ }
break;
case CHIP_TONGA:
raster_config = 0x16000012;
diff --git a/src/gallium/drivers/softpipe/Automake.inc b/src/gallium/drivers/softpipe/Automake.inc
index 6455f3caa3d..bd3c2eead16 100644
--- a/src/gallium/drivers/softpipe/Automake.inc
+++ b/src/gallium/drivers/softpipe/Automake.inc
@@ -3,13 +3,10 @@ if HAVE_GALLIUM_SOFTPIPE
TARGET_DRIVERS += swrast
TARGET_CPPFLAGS += -DGALLIUM_SOFTPIPE
TARGET_LIB_DEPS += \
- $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la \
$(top_builddir)/src/gallium/drivers/softpipe/libsoftpipe.la
-if HAVE_DRI2
+if HAVE_DRISW_KMS
TARGET_DRIVERS += kms_swrast
-TARGET_LIB_DEPS += \
- $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la
endif
endif
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index bcce18a3502..6a4f9d8d076 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -51,14 +51,16 @@
#define SVGA_QUERY_NUM_VALIDATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 3)
#define SVGA_QUERY_MAP_BUFFER_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 4)
#define SVGA_QUERY_NUM_RESOURCES_MAPPED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define SVGA_QUERY_NUM_BYTES_UPLOADED (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+
/* running total counters */
-#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 11)
/*SVGA_QUERY_MAX has to be last because it is size of an array*/
-#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 12)
/**
* Maximum supported number of constant buffers per shader
@@ -485,6 +487,7 @@ struct svga_context
uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+ uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
} hud;
/** The currently bound stream output targets */
diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 28b8064bf70..2b549dfa5bb 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -53,17 +53,17 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, 0 },
- { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A4R4G4B4, 0 },
+ { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, 0 },
{ PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, 0 },
- { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_LUMINANCE8, 0 },
+ { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, 0 },
{ PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
- { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_Z_D16, 0 },
+ { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D16_UNORM, 0 },
{ PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 },
{ PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 },
@@ -152,14 +152,14 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, 0 },
- { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
- { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
- { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
- { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
- { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
+ { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 },
+ { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM, 0 },
+ { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM, 0 },
+ { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM, 0 },
+ { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC1_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC2_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_BC3_UNORM_SRGB, 0 },
{ PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 },
{ PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 },
{ PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 },
@@ -326,6 +326,34 @@ static const struct vgpu10_format_entry format_conversion_table[] =
{ PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_4x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x4, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x5, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x6, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x10, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_4x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x4_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_5x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_6x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_8x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x6_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_10x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
};
@@ -472,7 +500,7 @@ struct format_cap {
* PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
* for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
* If we want to query if a format supports both rendering and sampling the
- * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * host will tell us no for SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D16_UNORM and
* SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those
* formats and report that both can do rendering and sampling.
*/
@@ -1410,27 +1438,50 @@ static const struct format_cap format_cap_table[] = {
},
{
"SVGA3D_BC1_TYPELESS",
- SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC1_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC1_UNORM_SRGB",
- SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC1_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_TYPELESS",
- SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC2_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_UNORM_SRGB",
- SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC2_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_TYPELESS",
- SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+ SVGA3D_BC3_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_UNORM_SRGB",
- SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+ SVGA3D_BC3_UNORM_SRGB,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC4_TYPELESS",
@@ -1671,7 +1722,7 @@ static const struct format_cap format_cap_table[] = {
{
"SVGA3D_D16_UNORM",
SVGA3D_D16_UNORM,
- SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+ 0, /*SVGA3D_DEVCAP_DXFMT_D16_UNORM*/
1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
@@ -1690,15 +1741,27 @@ static const struct format_cap format_cap_table[] = {
},
{
"SVGA3D_BC1_UNORM",
- SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC1_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC2_UNORM",
- SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC2_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_BC3_UNORM",
- SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+ SVGA3D_BC3_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC3_UNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
"SVGA3D_B5G6R5_UNORM",
@@ -2053,6 +2116,7 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_R8G8_UINT:
case SVGA3D_R8G8_SINT:
return SVGA3D_R8G8_TYPELESS;
+ case SVGA3D_D16_UNORM:
case SVGA3D_R16_UNORM:
case SVGA3D_R16_UINT:
case SVGA3D_R16_SNORM:
@@ -2070,6 +2134,15 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
case SVGA3D_B8G8R8X8_UNORM_SRGB:
case SVGA3D_B8G8R8X8_UNORM:
return SVGA3D_B8G8R8X8_TYPELESS;
+ case SVGA3D_BC1_UNORM:
+ case SVGA3D_BC1_UNORM_SRGB:
+ return SVGA3D_BC1_TYPELESS;
+ case SVGA3D_BC2_UNORM:
+ case SVGA3D_BC2_UNORM_SRGB:
+ return SVGA3D_BC2_TYPELESS;
+ case SVGA3D_BC3_UNORM:
+ case SVGA3D_BC3_UNORM_SRGB:
+ return SVGA3D_BC3_TYPELESS;
case SVGA3D_BC4_UNORM:
case SVGA3D_BC4_SNORM:
return SVGA3D_BC4_TYPELESS;
@@ -2079,18 +2152,10 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
/* Special cases (no corresponding _TYPELESS formats) */
case SVGA3D_A8_UNORM:
- case SVGA3D_A4R4G4B4:
case SVGA3D_B5G5R5A1_UNORM:
case SVGA3D_B5G6R5_UNORM:
- case SVGA3D_DXT1:
- case SVGA3D_DXT2:
- case SVGA3D_DXT3:
- case SVGA3D_DXT4:
- case SVGA3D_DXT5:
case SVGA3D_R11G11B10_FLOAT:
case SVGA3D_R9G9B9E5_SHAREDEXP:
- case SVGA3D_Z_D32:
- case SVGA3D_Z_D16:
return format;
default:
debug_printf("Unexpected format %s in %s\n",
@@ -2098,3 +2163,26 @@ svga_typeless_format(SVGA3dSurfaceFormat format)
return format;
}
}
+
+
+/**
+ * Given a surface format, return the corresponding format to use for
+ * a texture sampler. In most cases, it's the format unchanged, but there
+ * are some special cases.
+ */
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format)
+{
+ switch (format) {
+ case SVGA3D_D16_UNORM:
+ return SVGA3D_R16_UNORM;
+ case SVGA3D_D24_UNORM_S8_UINT:
+ return SVGA3D_R24_UNORM_X8_TYPELESS;
+ case SVGA3D_D32_FLOAT:
+ return SVGA3D_R32_FLOAT;
+ case SVGA3D_D32_FLOAT_S8X24_UINT:
+ return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+ default:
+ return format;
+ }
+}
diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h
index 0af218cb01a..9f9a530d473 100644
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@@ -93,4 +93,8 @@ SVGA3dSurfaceFormat
svga_typeless_format(SVGA3dSurfaceFormat format);
+SVGA3dSurfaceFormat
+svga_sampler_format(SVGA3dSurfaceFormat format);
+
+
#endif /* SVGA_FORMAT_H_ */
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 8b9818334ca..5416a009dcb 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -731,6 +731,7 @@ svga_create_query(struct pipe_context *pipe,
case SVGA_QUERY_MAP_BUFFER_TIME:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
break;
default:
assert(!"unexpected query type in svga_create_query()");
@@ -797,6 +798,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_MAP_BUFFER_TIME:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
/* nothing */
break;
default:
@@ -876,6 +878,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
sq->begin_count = svga->hud.num_resources_mapped;
break;
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
+ sq->begin_count = svga->hud.num_bytes_uploaded;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -966,6 +971,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
sq->end_count = svga->hud.num_resources_mapped;
break;
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
+ sq->end_count = svga->hud.num_bytes_uploaded;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1061,6 +1069,7 @@ svga_get_query_result(struct pipe_context *pipe,
case SVGA_QUERY_NUM_FLUSHES:
case SVGA_QUERY_NUM_VALIDATIONS:
case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+ case SVGA_QUERY_NUM_BYTES_UPLOADED:
case SVGA_QUERY_MAP_BUFFER_TIME:
vresult->u64 = sq->end_count - sq->begin_count;
break;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 71f2f4f2779..449cc149a81 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -80,6 +80,11 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
uint8_t *map;
int64_t begin = os_time_get();
+ assert(box->y == 0);
+ assert(box->z == 0);
+ assert(box->height == 1);
+ assert(box->depth == 1);
+
transfer = CALLOC_STRUCT(pipe_transfer);
if (transfer == NULL) {
return NULL;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 69e5f75e208..8c5cff5abc1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -429,6 +429,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x <= sbuf->b.b.width0);
assert(box->x + box->w <= sbuf->b.b.width0);
+
+ svga->hud.num_bytes_uploaded += box->w;
}
}
else {
@@ -454,6 +456,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x <= sbuf->b.b.width0);
assert(box->x + box->w <= sbuf->b.b.width0);
+
+ svga->hud.num_bytes_uploaded += box->w;
}
}
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index a02d1e495ff..81594777258 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -380,6 +380,12 @@ svga_texture_transfer_map(struct pipe_context *pipe,
break;
}
+ if (usage & PIPE_TRANSFER_WRITE) {
+ /* record texture upload for HUD */
+ svga->hud.num_bytes_uploaded +=
+ nblocksx * nblocksy * d * util_format_get_blocksize(texture->format);
+ }
+
if (!use_direct_map) {
/* Use a DMA buffer */
st->hw_nblocksy = nblocksy;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index a80bc9b9119..09a3d33552b 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -789,6 +789,8 @@ svga_get_driver_query_info(struct pipe_screen *screen,
{"map-buffer-time", SVGA_QUERY_MAP_BUFFER_TIME, {0},
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS},
{"num-resources-mapped", SVGA_QUERY_NUM_RESOURCES_MAPPED, {0}},
+ {"num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, {0},
+ PIPE_DRIVER_QUERY_TYPE_BYTES, PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE},
/* running total counters */
{"memory-used", SVGA_QUERY_MEMORY_USED, {0},
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index 611d2c6102f..c5d52bbfd14 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -108,6 +108,9 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
PIPE_BIND_SAMPLER_VIEW);
assert(format != SVGA3D_FORMAT_INVALID);
+ /* Convert the format to a sampler-friendly format, if needed */
+ format = svga_sampler_format(format);
+
if (texture->target == PIPE_BUFFER) {
viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 8b02680c77e..62a51e9a94d 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -456,9 +456,6 @@ trace_screen_create(struct pipe_screen *screen)
{
struct trace_screen *tr_scr;
- if(!screen)
- goto error1;
-
if (!trace_enabled())
goto error1;
diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc
index 6fa3e190cac..5664c2ab14e 100644
--- a/src/gallium/drivers/vc4/Automake.inc
+++ b/src/gallium/drivers/vc4/Automake.inc
@@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
$(top_builddir)/src/gallium/drivers/vc4/libvc4.la
-if USE_VC4_SIMULATOR
-TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR
-endif
-
endif
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index f4a57ba3404..a3bf72fc72a 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -23,7 +23,6 @@ include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
if USE_VC4_SIMULATOR
-SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
SIM_LDFLAGS = -lsimpenrose
endif
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 373c9e12d11..0672a92226f 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
nir_ssa_def *src_color[4], *unpacked_dst_color[4];
for (unsigned i = 0; i < 4; i++) {
- src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
- unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+ src_color[i] = nir_channel(b, intr->src[0].ssa, i);
+ unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
}
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 7ea263afb68..1afe52a63f4 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
static nir_ssa_def *
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
- return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
+ return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan);
}
static nir_ssa_def *
@@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
assert(intr->src[0].is_ssa);
- intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
- intr->src[0].ssa,
- &i, 1, false));
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i));
nir_builder_instr_insert(b, &intr_comp->instr);
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index f1bab810eff..07a92266dd2 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c)
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
if (is_zero(c, inst->src[1])) {
/* Replace references to a 0 uniform value
* with the SEL_X_0 equivalent.
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a48dad804e2..197577b6c20 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
qir_SF(c, qir_SUB(c, src[0], src[1]));
*dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
break;
+ case nir_op_uge:
+ qir_SF(c, qir_SUB(c, src[0], src[1]));
+ *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
+ break;
case nir_op_ilt:
qir_SF(c, qir_SUB(c, src[0], src[1]));
*dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
@@ -1167,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c)
struct qreg point_size;
if (c->output_point_size_index != -1)
- point_size = c->outputs[c->output_point_size_index + 3];
+ point_size = c->outputs[c->output_point_size_index];
else
point_size = qir_uniform_f(c, 1.0);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 7894b081b19..f2855e159fc 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
[QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
[QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+ [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
+ [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
[QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
[QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
[QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
[QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+ [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
+ [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },
[QOP_RCP] = { "rcp", 1, 1, false, true },
[QOP_RSQ] = { "rsq", 1, 1, false, true },
@@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst)
case QOP_SEL_X_0_NC:
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
+ case QOP_SEL_X_0_CS:
+ case QOP_SEL_X_0_CC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
case QOP_SEL_X_Y_ZS:
case QOP_SEL_X_Y_ZC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
return true;
default:
return false;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index a92ad93ee07..ddb35e41fcf 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -99,11 +99,15 @@ enum qop {
QOP_SEL_X_0_ZC,
QOP_SEL_X_0_NS,
QOP_SEL_X_0_NC,
+ QOP_SEL_X_0_CS,
+ QOP_SEL_X_0_CC,
/* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
QOP_SEL_X_Y_ZS,
QOP_SEL_X_Y_ZC,
QOP_SEL_X_Y_NS,
QOP_SEL_X_Y_NC,
+ QOP_SEL_X_Y_CS,
+ QOP_SEL_X_Y_CC,
QOP_FTOI,
QOP_ITOF,
@@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS)
QIR_ALU1(SEL_X_0_ZC)
QIR_ALU1(SEL_X_0_NS)
QIR_ALU1(SEL_X_0_NC)
+QIR_ALU1(SEL_X_0_CS)
+QIR_ALU1(SEL_X_0_CC)
QIR_ALU2(SEL_X_Y_ZS)
QIR_ALU2(SEL_X_Y_ZC)
QIR_ALU2(SEL_X_Y_NS)
QIR_ALU2(SEL_X_Y_NC)
+QIR_ALU2(SEL_X_Y_CS)
+QIR_ALU2(SEL_X_Y_CC)
QIR_ALU2(FMIN)
QIR_ALU2(FMAX)
QIR_ALU2(FMINABS)
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index f087c3b81b5..a57e100593c 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -22,14 +22,10 @@
*/
/**
- * @file vc4_opt_algebraic.c
+ * @file vc4_qir_lower_uniforms.c
*
- * This is the optimization pass for miscellaneous changes to instructions
- * where we can simplify the operation by some knowledge about the specific
- * operations.
- *
- * Mostly this will be a matter of turning things into MOVs so that they can
- * later be copy-propagated out.
+ * This is the pre-code-generation pass for fixing up instructions that try to
+ * read from multiple uniform values.
*/
#include "vc4_qir.h"
@@ -85,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i)
return true;
}
+/* Returns the number of different uniform values referenced by the
+ * instruction.
+ */
+static uint32_t
+qir_get_instruction_uniform_count(struct qinst *inst)
+{
+ uint32_t count = 0;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file != QFILE_UNIF)
+ continue;
+
+ bool is_duplicate = false;
+ for (int j = 0; j < i; j++) {
+ if (inst->src[j].file == QFILE_UNIF &&
+ inst->src[j].index == inst->src[i].index) {
+ is_duplicate = true;
+ break;
+ }
+ }
+ if (!is_duplicate)
+ count++;
+ }
+
+ return count;
+}
+
void
qir_lower_uniforms(struct vc4_compile *c)
{
@@ -98,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
- uint32_t count = 0;
- for (int i = 0; i < nsrc; i++) {
- if (inst->src[i].file == QFILE_UNIF)
- count++;
- }
-
- if (count <= 1)
+ if (qir_get_instruction_uniform_count(inst) <= 1)
continue;
for (int i = 0; i < nsrc; i++) {
@@ -140,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
uint32_t nsrc = qir_get_op_nsrc(inst->op);
- uint32_t count = 0;
- for (int i = 0; i < nsrc; i++) {
- if (inst->src[i].file == QFILE_UNIF)
- count++;
- }
+ uint32_t count = qir_get_instruction_uniform_count(inst);
if (count <= 1)
continue;
+ bool removed = false;
for (int i = 0; i < nsrc; i++) {
if (is_lowerable_uniform(inst, i) &&
inst->src[i].index == max_index) {
inst->src[i] = temp;
remove_uniform(ht, unif);
- count--;
+ removed = true;
}
}
+ if (removed)
+ count--;
/* If the instruction doesn't need lowering any more,
* then drop it from the list.
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 133e1385178..e0d3633da42 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
case QOP_SEL_X_0_NC:
+ case QOP_SEL_X_0_CS:
+ case QOP_SEL_X_0_CC:
queue(c, qpu_a_MOV(dst, src[0]) | unpack);
set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
QPU_COND_ZS);
@@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
+ case QOP_SEL_X_Y_CS:
+ case QOP_SEL_X_Y_CC:
queue(c, qpu_a_MOV(dst, src[0]));
if (qinst->src[0].pack)
*(last_inst(c)) |= unpack;
diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
index 7f11fba2340..85a0c95e851 100644
--- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
@@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c)
uint32_t next_uniform = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ uint32_t new = ~0;
+
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
if (inst->src[i].file != QFILE_UNIF)
continue;
- uint32_t new = next_uniform++;
- if (uniform_index_size <= new) {
- uniform_index_size =
- MAX2(uniform_index_size * 2, 16);
- uniform_index =
- realloc(uniform_index,
- uniform_index_size *
- sizeof(uint32_t));
+ if (new == ~0) {
+ new = next_uniform++;
+ if (uniform_index_size <= new) {
+ uniform_index_size =
+ MAX2(uniform_index_size * 2, 16);
+ uniform_index =
+ realloc(uniform_index,
+ uniform_index_size *
+ sizeof(uint32_t));
+ }
+ } else {
+ /* If we've got two uniform references in this
+ * instruction, they need to be the same
+ * uniform value.
+ */
+ assert(inst->src[i].index == uniform_index[new]);
}
uniform_index[new] = inst->src[i].index;