summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/v3d
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-05-01 12:24:48 -0700
committerEric Anholt <[email protected]>2018-05-16 21:19:07 +0100
commit8c47ebbd232704ab048eab2572e2b2a44f38957a (patch)
tree8946780fc424b3aa39e0b32ac875047605770a49 /src/gallium/drivers/v3d
parentc4c488a2aeb24c0f468664c0cacd0d01111a4e46 (diff)
v3d: Rename the driver files from "vc5" to "v3d".
Diffstat (limited to 'src/gallium/drivers/v3d')
-rw-r--r--src/gallium/drivers/v3d/.editorconfig3
-rw-r--r--src/gallium/drivers/v3d/Automake.inc14
-rw-r--r--src/gallium/drivers/v3d/Makefile.am56
-rw-r--r--src/gallium/drivers/v3d/Makefile.sources36
-rw-r--r--src/gallium/drivers/v3d/meson.build96
-rw-r--r--src/gallium/drivers/v3d/v3d_blit.c302
-rw-r--r--src/gallium/drivers/v3d/v3d_bufmgr.c552
-rw-r--r--src/gallium/drivers/v3d/v3d_bufmgr.h140
-rw-r--r--src/gallium/drivers/v3d/v3d_cl.c90
-rw-r--r--src/gallium/drivers/v3d/v3d_cl.h279
-rw-r--r--src/gallium/drivers/v3d/v3d_context.c183
-rw-r--r--src/gallium/drivers/v3d/v3d_context.h565
-rw-r--r--src/gallium/drivers/v3d/v3d_fence.c104
-rw-r--r--src/gallium/drivers/v3d/v3d_format_table.h54
-rw-r--r--src/gallium/drivers/v3d/v3d_formats.c144
-rw-r--r--src/gallium/drivers/v3d/v3d_job.c452
-rw-r--r--src/gallium/drivers/v3d/v3d_program.c682
-rw-r--r--src/gallium/drivers/v3d/v3d_query.c180
-rw-r--r--src/gallium/drivers/v3d/v3d_resource.c914
-rw-r--r--src/gallium/drivers/v3d/v3d_resource.h175
-rw-r--r--src/gallium/drivers/v3d/v3d_screen.c648
-rw-r--r--src/gallium/drivers/v3d/v3d_screen.h101
-rw-r--r--src/gallium/drivers/v3d/v3d_simulator.c660
-rw-r--r--src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp88
-rw-r--r--src/gallium/drivers/v3d/v3d_simulator_wrapper.h44
-rw-r--r--src/gallium/drivers/v3d/v3d_tiling.c389
-rw-r--r--src/gallium/drivers/v3d/v3d_tiling.h43
-rw-r--r--src/gallium/drivers/v3d/v3d_uniforms.c489
-rw-r--r--src/gallium/drivers/v3d/v3dx_context.h47
-rw-r--r--src/gallium/drivers/v3d/v3dx_draw.c714
-rw-r--r--src/gallium/drivers/v3d/v3dx_emit.c722
-rw-r--r--src/gallium/drivers/v3d/v3dx_format_table.c318
-rw-r--r--src/gallium/drivers/v3d/v3dx_job.c76
-rw-r--r--src/gallium/drivers/v3d/v3dx_rcl.c782
-rw-r--r--src/gallium/drivers/v3d/v3dx_simulator.c190
-rw-r--r--src/gallium/drivers/v3d/v3dx_state.c951
36 files changed, 11283 insertions, 0 deletions
diff --git a/src/gallium/drivers/v3d/.editorconfig b/src/gallium/drivers/v3d/.editorconfig
new file mode 100644
index 00000000000..5a9f3c041a4
--- /dev/null
+++ b/src/gallium/drivers/v3d/.editorconfig
@@ -0,0 +1,3 @@
+[*.{c,h,cpp}]
+indent_style = space
+indent_size = 8
diff --git a/src/gallium/drivers/v3d/Automake.inc b/src/gallium/drivers/v3d/Automake.inc
new file mode 100644
index 00000000000..7cf8ae7cd8b
--- /dev/null
+++ b/src/gallium/drivers/v3d/Automake.inc
@@ -0,0 +1,14 @@
+if HAVE_GALLIUM_V3D
+
+TARGET_DRIVERS += v3d
+TARGET_CPPFLAGS += -DGALLIUM_V3D
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/winsys/v3d/drm/libv3ddrm.la \
+ $(top_builddir)/src/gallium/drivers/v3d/libv3d.la \
+ $(top_builddir)/src/broadcom/libbroadcom.la
+
+if !HAVE_GALLIUM_VC4
+TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
+endif
+
+endif
diff --git a/src/gallium/drivers/v3d/Makefile.am b/src/gallium/drivers/v3d/Makefile.am
new file mode 100644
index 00000000000..2b4c364c24e
--- /dev/null
+++ b/src/gallium/drivers/v3d/Makefile.am
@@ -0,0 +1,56 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_builddir)/src/broadcom \
+ $(LIBDRM_CFLAGS) \
+ $(V3D_SIMULATOR_CFLAGS) \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $()
+
+noinst_LTLIBRARIES = \
+ libv3d.la \
+ libv3d_v33.la \
+ libv3d_v41.la \
+ $()
+
+libv3d_v33_la_SOURCES = $(V3D_PER_VERSION_SOURCES)
+libv3d_v33_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=33
+
+libv3d_v41_la_SOURCES = $(V3D_PER_VERSION_SOURCES)
+libv3d_v41_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=41
+
+libv3d_la_SOURCES = $(C_SOURCES)
+
+libv3d_la_LDFLAGS = \
+ $(V3D_SIMULATOR_LIBS) \
+ $(NULL)
+libv3d_la_LIBADD = \
+ libv3d_v33.la \
+ libv3d_v41.la \
+ $()
+
+EXTRA_DIST = meson.build
diff --git a/src/gallium/drivers/v3d/Makefile.sources b/src/gallium/drivers/v3d/Makefile.sources
new file mode 100644
index 00000000000..c81ccb42013
--- /dev/null
+++ b/src/gallium/drivers/v3d/Makefile.sources
@@ -0,0 +1,36 @@
+C_SOURCES := \
+ v3d_blit.c \
+ v3d_bufmgr.c \
+ v3d_bufmgr.h \
+ v3d_cl.c \
+ v3d_cl.h \
+ v3d_context.c \
+ v3d_context.h \
+ v3d_fence.c \
+ v3d_formats.c \
+ v3d_format_table.h \
+ v3d_job.c \
+ v3d_program.c \
+ v3d_query.c \
+ v3d_resource.c \
+ v3d_resource.h \
+ v3d_screen.c \
+ v3d_screen.h \
+ v3d_simulator.c \
+ v3d_simulator_wrapper.cpp \
+ v3d_simulator_wrapper.h \
+ v3d_tiling.c \
+ v3d_tiling.h \
+ v3d_uniforms.c \
+ $()
+
+V3D_PER_VERSION_SOURCES = \
+ v3dx_context.h \
+ v3dx_draw.c \
+ v3dx_emit.c \
+ v3dx_format_table.c \
+ v3dx_job.c \
+ v3dx_rcl.c \
+ v3dx_simulator.c \
+ v3dx_state.c \
+ $()
diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build
new file mode 100644
index 00000000000..38021515eda
--- /dev/null
+++ b/src/gallium/drivers/v3d/meson.build
@@ -0,0 +1,96 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libv3d = files(
+ 'v3d_blit.c',
+ 'v3d_bufmgr.c',
+ 'v3d_bufmgr.h',
+ 'v3d_cl.c',
+ 'v3d_cl.h',
+ 'v3d_context.c',
+ 'v3d_context.h',
+ 'v3d_fence.c',
+ 'v3d_formats.c',
+ 'v3d_job.c',
+ 'v3d_program.c',
+ 'v3d_query.c',
+ 'v3d_resource.c',
+ 'v3d_resource.h',
+ 'v3d_screen.c',
+ 'v3d_screen.h',
+ 'v3d_simulator.c',
+ 'v3d_simulator_wrapper.cpp',
+ 'v3d_tiling.c',
+ 'v3d_tiling.h',
+ 'v3d_uniforms.c',
+)
+
+files_per_version = files(
+ 'v3dx_draw.c',
+ 'v3dx_emit.c',
+ 'v3dx_format_table.c',
+ 'v3dx_job.c',
+ 'v3dx_rcl.c',
+ 'v3dx_simulator.c',
+ 'v3dx_state.c',
+)
+
+v3dv3_c_args = []
+dep_v3dv3 = dependency('v3dv3')
+if dep_v3dv3.found()
+ v3dv3_c_args = '-DUSE_V3D_SIMULATOR'
+endif
+
+v3d_versions = ['33', '41']
+
+per_version_libs = []
+foreach ver : v3d_versions
+ per_version_libs += static_library(
+ 'v3d-v' + ver,
+ [files_per_version, v3d_xml_pack, nir_opcodes_h, nir_builder_opcodes_h],
+ include_directories : [
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
+ inc_gallium_drivers, inc_drm_uapi,
+ ],
+ c_args : [c_vis_args, v3dv3_c_args, '-DV3D_VERSION=' + ver],
+ cpp_args : [cpp_vis_args],
+ dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind],
+)
+
+endforeach
+
+libv3d = static_library(
+ 'v3d',
+ [files_libv3d, v3d_xml_pack],
+ include_directories : [
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
+ inc_gallium_drivers, inc_drm_uapi,
+ ],
+ c_args : [c_vis_args, v3dv3_c_args],
+ cpp_args : [cpp_vis_args, v3dv3_c_args],
+ dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers],
+ link_with: per_version_libs,
+)
+
+driver_v3d = declare_dependency(
+ compile_args : '-DGALLIUM_V3D',
+ link_with : [libv3d, libv3dwinsys, libbroadcom_cle, libbroadcom_v3d],
+ dependencies : idep_nir,
+)
diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c
new file mode 100644
index 00000000000..7c67d4561ba
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_blit.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright © 2015-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_blitter.h"
+#include "v3d_context.h"
+
+#if 0
+static struct pipe_surface *
+vc5_get_blit_surface(struct pipe_context *pctx,
+ struct pipe_resource *prsc, unsigned level)
+{
+ struct pipe_surface tmpl;
+
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.format = prsc->format;
+ tmpl.u.tex.level = level;
+ tmpl.u.tex.first_layer = 0;
+ tmpl.u.tex.last_layer = 0;
+
+ return pctx->create_surface(pctx, prsc, &tmpl);
+}
+
+static bool
+is_tile_unaligned(unsigned size, unsigned tile_size)
+{
+ return size & (tile_size - 1);
+}
+
+static bool
+vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ bool msaa = (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1);
+ int tile_width = msaa ? 32 : 64;
+ int tile_height = msaa ? 32 : 64;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format))
+ return false;
+
+ if (info->scissor_enable)
+ return false;
+
+ if ((info->mask & PIPE_MASK_RGBA) == 0)
+ return false;
+
+ if (info->dst.box.x != info->src.box.x ||
+ info->dst.box.y != info->src.box.y ||
+ info->dst.box.width != info->src.box.width ||
+ info->dst.box.height != info->src.box.height) {
+ return false;
+ }
+
+ int dst_surface_width = u_minify(info->dst.resource->width0,
+ info->dst.level);
+ int dst_surface_height = u_minify(info->dst.resource->height0,
+ info->dst.level);
+ if (is_tile_unaligned(info->dst.box.x, tile_width) ||
+ is_tile_unaligned(info->dst.box.y, tile_height) ||
+ (is_tile_unaligned(info->dst.box.width, tile_width) &&
+ info->dst.box.x + info->dst.box.width != dst_surface_width) ||
+ (is_tile_unaligned(info->dst.box.height, tile_height) &&
+ info->dst.box.y + info->dst.box.height != dst_surface_height)) {
+ return false;
+ }
+
+ /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
+ * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
+ * destination surface) to determine the stride. This may be wrong
+ * when reading from texture miplevels > 0, which are stored in
+ * POT-sized areas. For MSAA, the tile addresses are computed
+ * explicitly by the RCL, but still use the destination width to
+ * determine the stride (which could be fixed by explicitly supplying
+ * it in the ABI).
+ */
+ struct vc5_resource *rsc = vc5_resource(info->src.resource);
+
+ uint32_t stride;
+
+ if (info->src.resource->nr_samples > 1)
+ stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
+ /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
+ stride = align(dst_surface_width * rsc->cpp, 128); */
+ else
+ stride = align(dst_surface_width * rsc->cpp, 16);
+
+ if (stride != rsc->slices[info->src.level].stride)
+ return false;
+
+ if (info->dst.resource->format != info->src.resource->format)
+ return false;
+
+ if (false) {
+ fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
+ info->src.box.x,
+ info->src.box.y,
+ info->dst.box.x,
+ info->dst.box.y,
+ info->dst.box.width,
+ info->dst.box.height);
+ }
+
+ struct pipe_surface *dst_surf =
+ vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level);
+ struct pipe_surface *src_surf =
+ vc5_get_blit_surface(pctx, info->src.resource, info->src.level);
+
+ vc5_flush_jobs_reading_resource(vc5, info->src.resource);
+
+ struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL);
+ pipe_surface_reference(&job->color_read, src_surf);
+
+ /* If we're resolving from MSAA to single sample, we still need to run
+ * the engine in MSAA mode for the load.
+ */
+ if (!job->msaa && info->src.resource->nr_samples > 1) {
+ job->msaa = true;
+ job->tile_width = 32;
+ job->tile_height = 32;
+ }
+
+ job->draw_min_x = info->dst.box.x;
+ job->draw_min_y = info->dst.box.y;
+ job->draw_max_x = info->dst.box.x + info->dst.box.width;
+ job->draw_max_y = info->dst.box.y + info->dst.box.height;
+ job->draw_width = dst_surf->width;
+ job->draw_height = dst_surf->height;
+
+ job->tile_width = tile_width;
+ job->tile_height = tile_height;
+ job->msaa = msaa;
+ job->needs_flush = true;
+ job->resolve |= PIPE_CLEAR_COLOR;
+
+ vc5_job_submit(vc5, job);
+
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_surface_reference(&src_surf, NULL);
+
+ return true;
+}
+#endif
+
+void
+vc5_blitter_save(struct vc5_context *vc5)
+{
+ util_blitter_save_fragment_constant_buffer_slot(vc5->blitter,
+ vc5->constbuf[PIPE_SHADER_FRAGMENT].cb);
+ util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb);
+ util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx);
+ util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+ util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer);
+ util_blitter_save_viewport(vc5->blitter, &vc5->viewport);
+ util_blitter_save_scissor(vc5->blitter, &vc5->scissor);
+ util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs);
+ util_blitter_save_blend(vc5->blitter, vc5->blend);
+ util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa);
+ util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref);
+ util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask);
+ util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer);
+ util_blitter_save_fragment_sampler_states(vc5->blitter,
+ vc5->fragtex.num_samplers,
+ (void **)vc5->fragtex.samplers);
+ util_blitter_save_fragment_sampler_views(vc5->blitter,
+ vc5->fragtex.num_textures, vc5->fragtex.textures);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+}
+
+static bool
+vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(ctx);
+
+ if (!util_blitter_is_blit_supported(vc5->blitter, info)) {
+ fprintf(stderr, "blit unsupported %s -> %s\n",
+ util_format_short_name(info->src.resource->format),
+ util_format_short_name(info->dst.resource->format));
+ return false;
+ }
+
+ vc5_blitter_save(vc5);
+ util_blitter_blit(vc5->blitter, info);
+
+ return true;
+}
+
+/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
+ * or R8 texture.
+ */
+static void
+vc5_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(ctx);
+ struct vc5_resource *src = vc5_resource(info->src.resource);
+ struct vc5_resource *dst = vc5_resource(info->dst.resource);
+ enum pipe_format src_format, dst_format;
+
+ if (src->separate_stencil) {
+ src = src->separate_stencil;
+ src_format = PIPE_FORMAT_R8_UNORM;
+ } else {
+ src_format = PIPE_FORMAT_RGBA8888_UNORM;
+ }
+
+ if (dst->separate_stencil) {
+ dst = dst->separate_stencil;
+ dst_format = PIPE_FORMAT_R8_UNORM;
+ } else {
+ dst_format = PIPE_FORMAT_RGBA8888_UNORM;
+ }
+
+ /* Initialize the surface. */
+ struct pipe_surface dst_tmpl = {
+ .u.tex = {
+ .level = info->dst.level,
+ .first_layer = info->dst.box.z,
+ .last_layer = info->dst.box.z,
+ },
+ .format = dst_format,
+ };
+ struct pipe_surface *dst_surf =
+ ctx->create_surface(ctx, &dst->base, &dst_tmpl);
+
+ /* Initialize the sampler view. */
+ struct pipe_sampler_view src_tmpl = {
+ .target = src->base.target,
+ .format = src_format,
+ .u.tex = {
+ .first_level = info->src.level,
+ .last_level = info->src.level,
+ .first_layer = 0,
+ .last_layer = (PIPE_TEXTURE_3D ?
+ u_minify(src->base.depth0,
+ info->src.level) - 1 :
+ src->base.array_size - 1),
+ },
+ .swizzle_r = PIPE_SWIZZLE_X,
+ .swizzle_g = PIPE_SWIZZLE_Y,
+ .swizzle_b = PIPE_SWIZZLE_Z,
+ .swizzle_a = PIPE_SWIZZLE_W,
+ };
+ struct pipe_sampler_view *src_view =
+ ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
+
+ vc5_blitter_save(vc5);
+ util_blitter_blit_generic(vc5->blitter, dst_surf, &info->dst.box,
+ src_view, &info->src.box,
+ src->base.width0, src->base.height0,
+ PIPE_MASK_R,
+ PIPE_TEX_FILTER_NEAREST,
+ info->scissor_enable ? &info->scissor : NULL,
+ info->alpha_blend);
+
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_sampler_view_reference(&src_view, NULL);
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+void
+vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
+{
+ struct pipe_blit_info info = *blit_info;
+
+ if (info.mask & PIPE_MASK_S) {
+ vc5_stencil_blit(pctx, blit_info);
+ info.mask &= ~PIPE_MASK_S;
+ }
+
+#if 0
+ if (vc5_tile_blit(pctx, blit_info))
+ return;
+#endif
+
+ vc5_render_blit(pctx, &info);
+}
diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.c b/src/gallium/drivers/v3d/v3d_bufmgr.c
new file mode 100644
index 00000000000..ef2a5fa07be
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_bufmgr.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <err.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+
+#include "v3d_context.h"
+#include "v3d_screen.h"
+
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#include <memcheck.h>
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+static bool dump_stats = false;
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache);
+
+static void
+vc5_bo_dump_stats(struct vc5_screen *screen)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
+ fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024);
+ fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
+ fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024);
+
+ if (!list_empty(&cache->time_list)) {
+ struct vc5_bo *first = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.next,
+ time_list);
+ struct vc5_bo *last = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.prev,
+ time_list);
+
+ fprintf(stderr, " oldest cache time: %ld\n",
+ (long)first->free_time);
+ fprintf(stderr, " newest cache time: %ld\n",
+ (long)last->free_time);
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ fprintf(stderr, " now: %ld\n",
+ time.tv_sec);
+ }
+}
+
+static void
+vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo)
+{
+ list_del(&bo->time_list);
+ list_del(&bo->size_list);
+ cache->bo_count--;
+ cache->bo_size -= bo->size;
+}
+
+static struct vc5_bo *
+vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = size / 4096 - 1;
+
+ if (cache->size_list_size <= page_index)
+ return NULL;
+
+ struct vc5_bo *bo = NULL;
+ mtx_lock(&cache->lock);
+ if (!list_empty(&cache->size_list[page_index])) {
+ bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next,
+ size_list);
+
+ /* Check that the BO has gone idle. If not, then we want to
+ * allocate something new instead, since we assume that the
+ * user will proceed to CPU map it and fill it with stuff.
+ */
+ if (!vc5_bo_wait(bo, 0, NULL)) {
+ mtx_unlock(&cache->lock);
+ return NULL;
+ }
+
+ pipe_reference_init(&bo->reference, 1);
+ vc5_bo_remove_from_cache(cache, bo);
+
+ bo->name = name;
+ }
+ mtx_unlock(&cache->lock);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo *bo;
+ int ret;
+
+ size = align(size, 4096);
+
+ bo = vc5_bo_from_cache(screen, size, name);
+ if (bo) {
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb from cache:\n",
+ name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ return bo;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = size;
+ bo->name = name;
+ bo->private = true;
+
+ retry:
+ ;
+
+ bool cleared_and_retried = false;
+ struct drm_v3d_create_bo create = {
+ .size = size
+ };
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_CREATE_BO, &create);
+ bo->handle = create.handle;
+ bo->offset = create.offset;
+
+ if (ret != 0) {
+ if (!list_empty(&screen->bo_cache.time_list) &&
+ !cleared_and_retried) {
+ cleared_and_retried = true;
+ vc5_bo_cache_free_all(&screen->bo_cache);
+ goto retry;
+ }
+
+ free(bo);
+ return NULL;
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ return bo;
+}
+
+void
+vc5_bo_last_unreference(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ mtx_lock(&screen->bo_cache.lock);
+ vc5_bo_last_unreference_locked_timed(bo, time.tv_sec);
+ mtx_unlock(&screen->bo_cache.lock);
+}
+
+static void
+vc5_bo_free(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (bo->map) {
+ if (using_vc5_simulator && bo->name &&
+ strcmp(bo->name, "winsys") == 0) {
+ free(bo->map);
+ } else {
+ munmap(bo->map, bo->size);
+ VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+ }
+ }
+
+ struct drm_gem_close c;
+ memset(&c, 0, sizeof(c));
+ c.handle = bo->handle;
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
+ if (ret != 0)
+ fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+
+ screen->bo_count--;
+ screen->bo_size -= bo->size;
+
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s%s%dkb:\n",
+ bo->name ? bo->name : "",
+ bo->name ? " " : "",
+ bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ free(bo);
+}
+
+static void
+free_stale_bos(struct vc5_screen *screen, time_t time)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ bool freed_any = false;
+
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ if (dump_stats && !freed_any) {
+ fprintf(stderr, "Freeing stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ freed_any = true;
+ }
+
+ /* If it's more than a second old, free it. */
+ if (time - bo->free_time > 2) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ } else {
+ break;
+ }
+ }
+
+ if (dump_stats && freed_any) {
+ fprintf(stderr, "Freed stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache)
+{
+ mtx_lock(&cache->lock);
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ }
+ mtx_unlock(&cache->lock);
+}
+
+void
+vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time)
+{
+ struct vc5_screen *screen = bo->screen;
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = bo->size / 4096 - 1;
+
+ if (!bo->private) {
+ vc5_bo_free(bo);
+ return;
+ }
+
+ if (cache->size_list_size <= page_index) {
+ struct list_head *new_list =
+ ralloc_array(screen, struct list_head, page_index + 1);
+
+ /* Move old list contents over (since the array has moved, and
+ * therefore the pointers to the list heads have to change).
+ */
+ for (int i = 0; i < cache->size_list_size; i++) {
+ struct list_head *old_head = &cache->size_list[i];
+ if (list_empty(old_head))
+ list_inithead(&new_list[i]);
+ else {
+ new_list[i].next = old_head->next;
+ new_list[i].prev = old_head->prev;
+ new_list[i].next->prev = &new_list[i];
+ new_list[i].prev->next = &new_list[i];
+ }
+ }
+ for (int i = cache->size_list_size; i < page_index + 1; i++)
+ list_inithead(&new_list[i]);
+
+ cache->size_list = new_list;
+ cache->size_list_size = page_index + 1;
+ }
+
+ bo->free_time = time;
+ list_addtail(&bo->size_list, &cache->size_list[page_index]);
+ list_addtail(&bo->time_list, &cache->time_list);
+ cache->bo_count++;
+ cache->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s %dkb to cache:\n",
+ bo->name, bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ bo->name = NULL;
+
+ free_stale_bos(screen, time);
+}
+
+static struct vc5_bo *
+vc5_bo_open_handle(struct vc5_screen *screen,
+ uint32_t winsys_stride,
+ uint32_t handle, uint32_t size)
+{
+ struct vc5_bo *bo;
+
+ assert(size);
+
+ mtx_lock(&screen->bo_handles_mutex);
+
+ bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+ if (bo) {
+ pipe_reference(NULL, &bo->reference);
+ goto done;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->handle = handle;
+ bo->size = size;
+ bo->name = "winsys";
+ bo->private = false;
+
+#ifdef USE_V3D_SIMULATOR
+ vc5_simulator_open_from_handle(screen->fd, winsys_stride,
+ bo->handle, bo->size);
+ bo->map = malloc(bo->size);
+#endif
+
+ struct drm_v3d_get_bo_offset get = {
+ .handle = handle,
+ };
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get);
+ if (ret) {
+ fprintf(stderr, "Failed to get BO offset: %s\n",
+ strerror(errno));
+ free(bo->map);
+ free(bo);
+ return NULL;
+ }
+ bo->offset = get.offset;
+ assert(bo->offset != 0);
+
+ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+ mtx_unlock(&screen->bo_handles_mutex);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride)
+{
+ struct drm_gem_open o = {
+ .name = name
+ };
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o);
+ if (ret) {
+ fprintf(stderr, "Failed to open bo %d: %s\n",
+ name, strerror(errno));
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+}
+
+struct vc5_bo *
+vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride)
+{
+ uint32_t handle;
+ int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
+ int size;
+ if (ret) {
+ fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd);
+ return NULL;
+ }
+
+ /* Determine the size of the bo we were handed. */
+ size = lseek(fd, 0, SEEK_END);
+ if (size == -1) {
+ fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd);
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, handle, size);
+}
+
+int
+vc5_bo_get_dmabuf(struct vc5_bo *bo)
+{
+ int fd;
+ int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle,
+ O_CLOEXEC, &fd);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to export gem bo %d to dmabuf\n",
+ bo->handle);
+ return -1;
+ }
+
+ mtx_lock(&bo->screen->bo_handles_mutex);
+ bo->private = false;
+ util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+ mtx_unlock(&bo->screen->bo_handles_mutex);
+
+ return fd;
+}
+
+bool
+vc5_bo_flink(struct vc5_bo *bo, uint32_t *name)
+{
+ struct drm_gem_flink flink = {
+ .handle = bo->handle,
+ };
+ int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink);
+ if (ret) {
+ fprintf(stderr, "Failed to flink bo %d: %s\n",
+ bo->handle, strerror(errno));
+ free(bo);
+ return false;
+ }
+
+ bo->private = false;
+ *name = flink.name;
+
+ return true;
+}
+
+static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ struct drm_v3d_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = vc5_ioctl(fd, DRM_IOCTL_V3D_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
+ }
+
+ int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo)
+{
+ uint64_t offset;
+ int ret;
+
+ if (bo->map)
+ return bo->map;
+
+ struct drm_v3d_mmap_bo map;
+ memset(&map, 0, sizeof(map));
+ map.handle = bo->handle;
+ ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_V3D_MMAP_BO, &map);
+ offset = map.offset;
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bo->screen->fd, offset);
+ if (bo->map == MAP_FAILED) {
+ fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ bo->handle, (long long)offset, bo->size);
+ abort();
+ }
+ VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
+
+ return bo->map;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo)
+{
+ void *map = vc5_bo_map_unsynchronized(bo);
+
+ bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
+ if (!ok) {
+ fprintf(stderr, "BO wait for map failed\n");
+ abort();
+ }
+
+ return map;
+}
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ vc5_bo_cache_free_all(cache);
+
+ if (dump_stats) {
+ fprintf(stderr, "BO stats after screen destroy:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.h b/src/gallium/drivers/v3d/v3d_bufmgr.h
new file mode 100644
index 00000000000..4519a206026
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_bufmgr.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_BUFMGR_H
+#define VC5_BUFMGR_H
+
+#include <stdint.h>
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/list.h"
+#include "v3d_screen.h"
+
+struct vc5_context;
+
+struct vc5_bo {
+ struct pipe_reference reference;
+ struct vc5_screen *screen;
+ void *map;
+ const char *name;
+ uint32_t handle;
+ uint32_t size;
+
+ /* Address of the BO in our page tables. */
+ uint32_t offset;
+
+ /** Entry in the linked list of buffers freed, by age. */
+ struct list_head time_list;
+ /** Entry in the per-page-count linked list of buffers freed (by age). */
+ struct list_head size_list;
+ /** Approximate second when the bo was freed. */
+ time_t free_time;
+ /**
+ * Whether only our process has a reference to the BO (meaning that
+ * it's safe to reuse it in the BO cache).
+ */
+ bool private;
+};
+
+struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size,
+ const char *name);
+void vc5_bo_last_unreference(struct vc5_bo *bo);
+void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time);
+struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride);
+struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd,
+ uint32_t winsys_stride);
+bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name);
+int vc5_bo_get_dmabuf(struct vc5_bo *bo);
+
+static inline void
+vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo)
+{
+ if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
+ vc5_bo_last_unreference(*old_bo);
+ *old_bo = new_bo;
+}
+
+static inline struct vc5_bo *
+vc5_bo_reference(struct vc5_bo *bo)
+{
+ pipe_reference(NULL, &bo->reference);
+ return bo;
+}
+
+static inline void
+vc5_bo_unreference(struct vc5_bo **bo)
+{
+ struct vc5_screen *screen;
+ if (!*bo)
+ return;
+
+ if ((*bo)->private) {
+ /* Avoid the mutex for private BOs */
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference(*bo);
+ } else {
+ screen = (*bo)->screen;
+ mtx_lock(&screen->bo_handles_mutex);
+
+ if (pipe_reference(&(*bo)->reference, NULL)) {
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)(*bo)->handle);
+ vc5_bo_last_unreference(*bo);
+ }
+
+ mtx_unlock(&screen->bo_handles_mutex);
+ }
+
+ *bo = NULL;
+}
+
+static inline void
+vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time)
+{
+ if (!*bo)
+ return;
+
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference_locked_timed(*bo, time);
+ *bo = NULL;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo);
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo);
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason);
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen);
+
+#endif /* VC5_BUFMGR_H */
+
diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3d_cl.c
new file mode 100644
index 00000000000..2ffb7ea9a2c
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_cl.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/ralloc.h"
+#include "v3d_context.h"
+/* The branching packets are the same across V3D versions. */
+#define V3D_VERSION 33
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+void
+vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl)
+{
+ cl->base = NULL;
+ cl->next = cl->base;
+ cl->size = 0;
+ cl->job = job;
+}
+
+uint32_t
+vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment)
+{
+ uint32_t offset = align(cl_offset(cl), alignment);
+
+ if (offset + space <= cl->size) {
+ cl->next = cl->base + offset;
+ return offset;
+ }
+
+ vc5_bo_unreference(&cl->bo);
+ cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL");
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+
+ return 0;
+}
+
+void
+vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space)
+{
+ if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
+ return;
+
+ struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL");
+ assert(space <= new_bo->size);
+
+ /* Chain to the new BO from the old one. */
+ if (cl->bo) {
+ cl_emit(cl, BRANCH, branch) {
+ branch.address = cl_address(new_bo, 0);
+ }
+ vc5_bo_unreference(&cl->bo);
+ } else {
+ /* Root the first RCL/BCL BO in the job. */
+ vc5_job_add_bo(cl->job, cl->bo);
+ }
+
+ cl->bo = new_bo;
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+}
+
+void
+vc5_destroy_cl(struct vc5_cl *cl)
+{
+ vc5_bo_unreference(&cl->bo);
+}
diff --git a/src/gallium/drivers/v3d/v3d_cl.h b/src/gallium/drivers/v3d/v3d_cl.h
new file mode 100644
index 00000000000..7025b5a672b
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_cl.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CL_H
+#define VC5_CL_H
+
+#include <stdint.h>
+
+#include "util/u_math.h"
+#include "util/macros.h"
+
+struct vc5_bo;
+struct vc5_job;
+struct vc5_cl;
+
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc5_cl_out;
+
+/** A reference to a BO used in the CL packing functions */
+struct vc5_cl_reloc {
+ struct vc5_bo *bo;
+ uint32_t offset;
+};
+
+static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *);
+
+#define __gen_user_data struct vc5_cl
+#define __gen_address_type struct vc5_cl_reloc
+#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
+ (reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+struct vc5_cl {
+ void *base;
+ struct vc5_job *job;
+ struct vc5_cl_out *next;
+ struct vc5_bo *bo;
+ uint32_t size;
+};
+
+void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl);
+void vc5_destroy_cl(struct vc5_cl *cl);
+void vc5_dump_cl(void *cl, uint32_t size, bool is_render);
+uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo);
+
+struct PACKED unaligned_16 { uint16_t x; };
+struct PACKED unaligned_32 { uint32_t x; };
+
+static inline uint32_t cl_offset(struct vc5_cl *cl)
+{
+ return (char *)cl->next - (char *)cl->base;
+}
+
+static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl)
+{
+ return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
+}
+
+static inline void
+cl_advance(struct vc5_cl_out **cl, uint32_t n)
+{
+ (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n);
+}
+
+static inline struct vc5_cl_out *
+cl_start(struct vc5_cl *cl)
+{
+ return cl->next;
+}
+
+static inline void
+cl_end(struct vc5_cl *cl, struct vc5_cl_out *next)
+{
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
+
+
+static inline void
+put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val)
+{
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+cl_u8(struct vc5_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
+}
+
+static inline void
+cl_u16(struct vc5_cl_out **cl, uint16_t n)
+{
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
+
+static inline void
+cl_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_reloc(struct vc5_cl *cl,
+ struct vc5_cl_out **cl_out,
+ struct vc5_bo *bo, uint32_t offset)
+{
+ cl_aligned_u32(cl_out, bo->offset + offset);
+ vc5_job_add_bo(cl->job, bo);
+}
+
+static inline void
+cl_ptr(struct vc5_cl_out **cl, void *ptr)
+{
+ *(struct vc5_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
+}
+
+static inline void
+cl_f(struct vc5_cl_out **cl, float f)
+{
+ cl_u32(cl, fui(f));
+}
+
+static inline void
+cl_aligned_f(struct vc5_cl_out **cl, float f)
+{
+ cl_aligned_u32(cl, fui(f));
+}
+
+/**
+ * Reference to a BO with its associated offset, used in the pack process.
+ */
+static inline struct vc5_cl_reloc
+cl_address(struct vc5_bo *bo, uint32_t offset)
+{
+ struct vc5_cl_reloc reloc = {
+ .bo = bo,
+ .offset = offset,
+ };
+ return reloc;
+}
+
+uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align);
+void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size);
+
+#define cl_packet_header(packet) V3DX(packet ## _header)
+#define cl_packet_length(packet) V3DX(packet ## _length)
+#define cl_packet_pack(packet) V3DX(packet ## _pack)
+#define cl_packet_struct(packet) V3DX(packet)
+
+static inline void *
+cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
+{
+ void *addr = *cl;
+ cl_advance(cl, size);
+ return addr;
+}
+
+/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ * .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ */
+#define cl_emit(cl, packet, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ struct vc5_cl_out *cl_out = cl_start(cl); \
+ cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
+ })) \
+
+#define cl_emit_with_prepacked(cl, packet, prepacked, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ struct vc5_cl_out *cl_out = cl_start(cl); \
+ uint8_t packed[cl_packet_length(packet)]; \
+ cl_packet_pack(packet)(cl, packed, &name); \
+ for (int _i = 0; _i < cl_packet_length(packet); _i++) \
+ ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
+ })) \
+
+#define cl_emit_prepacked(cl, packet) do { \
+ memcpy((cl)->next, packet, sizeof(*packet)); \
+ cl_advance(&(cl)->next, sizeof(*packet)); \
+} while (0)
+
+#define v3dx_pack(packed, packet, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
+ cl_packet_length(packet))); \
+ _loop_terminate = NULL; \
+ })) \
+
+/**
+ * Helper function called by the XML-generated pack functions for filling in
+ * an address field in shader records.
+ *
+ * Since we have a private address space as of VC5, our BOs can have lifelong
+ * offsets, and all the kernel needs to know is which BOs need to be paged in
+ * for this exec.
+ */
+static inline void
+cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc)
+{
+ if (reloc->bo)
+ vc5_job_add_bo(cl->job, reloc->bo);
+}
+
+#endif /* VC5_CL_H */
diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c
new file mode 100644
index 00000000000..cb37eba3841
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_context.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xf86drm.h>
+#include <err.h>
+
+#include "pipe/p_defines.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+#include "pipe/p_screen.h"
+
+#include "v3d_screen.h"
+#include "v3d_context.h"
+#include "v3d_resource.h"
+
+void
+vc5_flush(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+static void
+vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (fence) {
+ struct pipe_screen *screen = pctx->screen;
+ struct vc5_fence *f = vc5_fence_create(vc5);
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle *)f;
+ }
+}
+
+static void
+vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ rsc->initialized_buffers = 0;
+
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (!entry)
+ return;
+
+ struct vc5_job *job = entry->data;
+ if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+ job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+}
+
+static void
+vc5_context_destroy(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (vc5->blitter)
+ util_blitter_destroy(vc5->blitter);
+
+ if (vc5->primconvert)
+ util_primconvert_destroy(vc5->primconvert);
+
+ if (vc5->uploader)
+ u_upload_destroy(vc5->uploader);
+
+ slab_destroy_child(&vc5->transfer_pool);
+
+ pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL);
+ pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL);
+
+ vc5_program_fini(pctx);
+
+ ralloc_free(vc5);
+}
+
+struct pipe_context *
+vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_context *vc5;
+
+ /* Prevent dumping of the shaders built during context setup. */
+ uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB;
+ V3D_DEBUG &= ~V3D_DEBUG_SHADERDB;
+
+ vc5 = rzalloc(NULL, struct vc5_context);
+ if (!vc5)
+ return NULL;
+ struct pipe_context *pctx = &vc5->base;
+
+ vc5->screen = screen;
+
+ int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+ &vc5->out_sync);
+ if (ret) {
+ ralloc_free(vc5);
+ return NULL;
+ }
+
+ pctx->screen = pscreen;
+ pctx->priv = priv;
+ pctx->destroy = vc5_context_destroy;
+ pctx->flush = vc5_pipe_flush;
+ pctx->invalidate_resource = vc5_invalidate_resource;
+
+ if (screen->devinfo.ver >= 41) {
+ v3d41_draw_init(pctx);
+ v3d41_state_init(pctx);
+ } else {
+ v3d33_draw_init(pctx);
+ v3d33_state_init(pctx);
+ }
+ vc5_program_init(pctx);
+ vc5_query_init(pctx);
+ vc5_resource_context_init(pctx);
+
+ vc5_job_init(vc5);
+
+ vc5->fd = screen->fd;
+
+ slab_create_child(&vc5->transfer_pool, &screen->transfer_pool);
+
+ vc5->uploader = u_upload_create_default(&vc5->base);
+ vc5->base.stream_uploader = vc5->uploader;
+ vc5->base.const_uploader = vc5->uploader;
+
+ vc5->blitter = util_blitter_create(pctx);
+ if (!vc5->blitter)
+ goto fail;
+
+ vc5->primconvert = util_primconvert_create(pctx,
+ (1 << PIPE_PRIM_QUADS) - 1);
+ if (!vc5->primconvert)
+ goto fail;
+
+ V3D_DEBUG |= saved_shaderdb_flag;
+
+ vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+ vc5->active_queries = true;
+
+ return &vc5->base;
+
+fail:
+ pctx->destroy(pctx);
+ return NULL;
+}
diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h
new file mode 100644
index 00000000000..7c17eccd47e
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_context.h
@@ -0,0 +1,565 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CONTEXT_H
+#define VC5_CONTEXT_H
+
+#ifdef V3D_VERSION
+#include "broadcom/common/v3d_macros.h"
+#endif
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/bitset.h"
+#include "util/slab.h"
+#include "xf86drm.h"
+#include "v3d_drm.h"
+#include "v3d_screen.h"
+
+struct vc5_job;
+struct vc5_bo;
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+
+#include "v3d_bufmgr.h"
+#include "v3d_resource.h"
+#include "v3d_cl.h"
+
+#ifdef USE_V3D_SIMULATOR
+#define using_vc5_simulator true
+#else
+#define using_vc5_simulator false
+#endif
+
+#define VC5_DIRTY_BLEND (1 << 0)
+#define VC5_DIRTY_RASTERIZER (1 << 1)
+#define VC5_DIRTY_ZSA (1 << 2)
+#define VC5_DIRTY_FRAGTEX (1 << 3)
+#define VC5_DIRTY_VERTTEX (1 << 4)
+
+#define VC5_DIRTY_BLEND_COLOR (1 << 7)
+#define VC5_DIRTY_STENCIL_REF (1 << 8)
+#define VC5_DIRTY_SAMPLE_MASK (1 << 9)
+#define VC5_DIRTY_FRAMEBUFFER (1 << 10)
+#define VC5_DIRTY_STIPPLE (1 << 11)
+#define VC5_DIRTY_VIEWPORT (1 << 12)
+#define VC5_DIRTY_CONSTBUF (1 << 13)
+#define VC5_DIRTY_VTXSTATE (1 << 14)
+#define VC5_DIRTY_VTXBUF (1 << 15)
+#define VC5_DIRTY_SCISSOR (1 << 17)
+#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18)
+#define VC5_DIRTY_PRIM_MODE (1 << 19)
+#define VC5_DIRTY_CLIP (1 << 20)
+#define VC5_DIRTY_UNCOMPILED_VS (1 << 21)
+#define VC5_DIRTY_UNCOMPILED_FS (1 << 22)
+#define VC5_DIRTY_COMPILED_CS (1 << 23)
+#define VC5_DIRTY_COMPILED_VS (1 << 24)
+#define VC5_DIRTY_COMPILED_FS (1 << 25)
+#define VC5_DIRTY_FS_INPUTS (1 << 26)
+#define VC5_DIRTY_STREAMOUT (1 << 27)
+#define VC5_DIRTY_OQ (1 << 28)
+#define VC5_DIRTY_CENTROID_FLAGS (1 << 29)
+
+#define VC5_MAX_FS_INPUTS 64
+
+struct vc5_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t p0;
+ uint32_t p1;
+ /* Precomputed swizzles to pass in to the shader key. */
+ uint8_t swizzle[4];
+
+ uint8_t texture_shader_state[32];
+ /* V3D 4.x: Texture state struct. */
+ struct vc5_bo *bo;
+};
+
+struct vc5_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t p0;
+ uint32_t p1;
+
+ /* V3D 3.x: Packed texture state. */
+ uint8_t texture_shader_state[32];
+ /* V3D 4.x: Sampler state struct. */
+ struct vc5_bo *bo;
+};
+
+struct vc5_texture_stateobj {
+ struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+ unsigned num_textures;
+ struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+ struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS];
+};
+
+struct vc5_shader_uniform_info {
+ enum quniform_contents *contents;
+ uint32_t *data;
+ uint32_t count;
+};
+
+struct vc5_uncompiled_shader {
+ /** A name for this program, so you can track it in shader-db output. */
+ uint32_t program_id;
+ /** How many variants of this program were compiled, for shader-db. */
+ uint32_t compiled_variant_count;
+ struct pipe_shader_state base;
+ uint32_t num_tf_outputs;
+ struct v3d_varying_slot *tf_outputs;
+ uint16_t tf_specs[16];
+ uint16_t tf_specs_psiz[16];
+ uint32_t num_tf_specs;
+
+ /**
+ * Flag for if the NIR in this shader originally came from TGSI. If
+ * so, we need to do some fixups at compile time, due to missing
+ * information in TGSI that exists in NIR.
+ */
+ bool was_tgsi;
+};
+
+struct vc5_compiled_shader {
+ struct vc5_bo *bo;
+
+ union {
+ struct v3d_prog_data *base;
+ struct v3d_vs_prog_data *vs;
+ struct v3d_fs_prog_data *fs;
+ } prog_data;
+
+ /**
+ * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
+};
+
+struct vc5_program_stateobj {
+ struct vc5_uncompiled_shader *bind_vs, *bind_fs;
+ struct vc5_compiled_shader *cs, *vs, *fs;
+
+ struct vc5_bo *spill_bo;
+ int spill_size_per_thread;
+};
+
+struct vc5_constbuf_stateobj {
+ struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertexbuf_stateobj {
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ unsigned count;
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertex_stateobj {
+ struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES];
+ unsigned num_elements;
+
+ uint8_t attrs[12 * VC5_MAX_ATTRIBUTES];
+ struct vc5_bo *default_attribute_values;
+};
+
+struct vc5_streamout_stateobj {
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_targets;
+};
+
+/* Hash table key for vc5->jobs */
+struct vc5_job_key {
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+};
+
+enum vc5_ez_state {
+ VC5_EZ_UNDECIDED = 0,
+ VC5_EZ_GT_GE,
+ VC5_EZ_LT_LE,
+ VC5_EZ_DISABLED,
+};
+
+/**
+ * A complete bin/render job.
+ *
+ * This is all of the state necessary to submit a bin/render to the kernel.
+ * We want to be able to have multiple in progress at a time, so that we don't
+ * need to flush an existing CL just to switch to rendering to a new render
+ * target (which would mean reading back from the old render target when
+ * starting to render to it again).
+ */
+struct vc5_job {
+ struct vc5_context *vc5;
+ struct vc5_cl bcl;
+ struct vc5_cl rcl;
+ struct vc5_cl indirect;
+ struct vc5_bo *tile_alloc;
+ struct vc5_bo *tile_state;
+ uint32_t shader_rec_count;
+
+ struct drm_v3d_submit_cl submit;
+
+ /**
+ * Set of all BOs referenced by the job. This will be used for making
+ * the list of BOs that the kernel will need to have paged in to
+ * execute our job.
+ */
+ struct set *bos;
+
+ /** Sum of the sizes of the BOs referenced by the job. */
+ uint32_t referenced_size;
+
+ struct set *write_prscs;
+
+ /* Size of the submit.bo_handles array. */
+ uint32_t bo_handles_size;
+
+ /** @{ Surfaces to submit rendering for. */
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+ /** @} */
+ /** @{
+ * Bounding box of the scissor across all queued drawing.
+ *
+ * Note that the max values are exclusive.
+ */
+ uint32_t draw_min_x;
+ uint32_t draw_min_y;
+ uint32_t draw_max_x;
+ uint32_t draw_max_y;
+ /** @} */
+ /** @{
+ * Width/height of the color framebuffer being rendered to,
+ * for VC5_TILE_RENDERING_MODE_CONFIG.
+ */
+ uint32_t draw_width;
+ uint32_t draw_height;
+ /** @} */
+ /** @{ Tile information, depending on MSAA and float color buffer. */
+ uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
+ uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
+
+ uint32_t tile_width; /** @< Width of a tile. */
+ uint32_t tile_height; /** @< Height of a tile. */
+ /** maximum internal_bpp of all color render targets. */
+ uint32_t internal_bpp;
+
+ /** Whether the current rendering is in a 4X MSAA tile buffer. */
+ bool msaa;
+ /** @} */
+
+ /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
+ * first rendering.
+ */
+ uint32_t cleared;
+ /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
+ * (either clears or draws).
+ */
+ uint32_t resolve;
+ uint32_t clear_color[4][4];
+ float clear_z;
+ uint8_t clear_s;
+
+ /**
+ * Set if some drawing (triangles, blits, or just a glClear()) has
+ * been done to the FBO, meaning that we need to
+ * DRM_IOCTL_VC5_SUBMIT_CL.
+ */
+ bool needs_flush;
+
+ /**
+ * Set if there is a nonzero address for OCCLUSION_QUERY_COUNTER. If
+ * so, we need to disable it and flush before ending the CL, to keep
+ * the next tile from starting with it enabled.
+ */
+ bool oq_enabled;
+
+ /**
+ * Set when a packet enabling TF on all further primitives has been
+ * emitted.
+ */
+ bool tf_enabled;
+
+ /**
+ * Current EZ state for drawing. Updated at the start of draw after
+ * we've decided on the shader being rendered.
+ */
+ enum vc5_ez_state ez_state;
+ /**
+ * The first EZ state that was used for drawing with a decided EZ
+ * direction (so either UNDECIDED, GT, or LT).
+ */
+ enum vc5_ez_state first_ez_state;
+
+ /**
+ * Number of draw calls (not counting full buffer clears) queued in
+ * the current job.
+ */
+ uint32_t draw_calls_queued;
+
+ struct vc5_job_key key;
+};
+
+struct vc5_context {
+ struct pipe_context base;
+
+ int fd;
+ struct vc5_screen *screen;
+
+ /** The 3D rendering job for the currently bound FBO. */
+ struct vc5_job *job;
+
+ /* Map from struct vc5_job_key to the job for that FBO.
+ */
+ struct hash_table *jobs;
+
+ /**
+ * Map from vc5_resource to a job writing to that resource.
+ *
+ * Primarily for flushing jobs rendering to textures that are now
+ * being read from.
+ */
+ struct hash_table *write_jobs;
+
+ struct slab_child_pool transfer_pool;
+ struct blitter_context *blitter;
+
+ /** bitfield of VC5_DIRTY_* */
+ uint32_t dirty;
+
+ struct primconvert_context *primconvert;
+
+ struct hash_table *fs_cache, *vs_cache;
+ uint32_t next_uncompiled_program_id;
+ uint64_t next_compiled_program_id;
+
+ struct vc5_compiler_state *compiler_state;
+
+ uint8_t prim_mode;
+
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+
+ /** Sync object that our RCL will update as its out_sync. */
+ uint32_t out_sync;
+
+ struct u_upload_mgr *uploader;
+
+ /** @{ Current pipeline state objects */
+ struct pipe_scissor_state scissor;
+ struct pipe_blend_state *blend;
+ struct vc5_rasterizer_state *rasterizer;
+ struct vc5_depth_stencil_alpha_state *zsa;
+
+ struct vc5_texture_stateobj verttex, fragtex;
+
+ struct vc5_program_stateobj prog;
+
+ struct vc5_vertex_stateobj *vtx;
+
+ struct {
+ struct pipe_blend_color f;
+ uint16_t hf[4];
+ } blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ unsigned sample_mask;
+ struct pipe_framebuffer_state framebuffer;
+
+ /* Per render target, whether we should swap the R and B fields in the
+ * shader's color output and in blending. If render targets disagree
+ * on the R/B swap and use the constant color, then we would need to
+ * fall back to in-shader blending.
+ */
+ uint8_t swap_color_rb;
+
+ /* Per render target, whether we should treat the dst alpha values as
+ * one in blending.
+ *
+ * For RGBX formats, the tile buffer's alpha channel will be
+ * undefined.
+ */
+ uint8_t blend_dst_alpha_one;
+
+ bool active_queries;
+
+ uint32_t tf_prims_generated;
+ uint32_t prims_generated;
+
+ struct pipe_poly_stipple stipple;
+ struct pipe_clip_state clip;
+ struct pipe_viewport_state viewport;
+ struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+ struct vc5_vertexbuf_stateobj vertexbuf;
+ struct vc5_streamout_stateobj streamout;
+ struct vc5_bo *current_oq;
+ /** @} */
+};
+
+struct vc5_rasterizer_state {
+ struct pipe_rasterizer_state base;
+
+ /* VC5_CONFIGURATION_BITS */
+ uint8_t config_bits[3];
+
+ float point_size;
+
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset units for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_units;
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset scale for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_factor;
+};
+
+struct vc5_depth_stencil_alpha_state {
+ struct pipe_depth_stencil_alpha_state base;
+
+ enum vc5_ez_state ez_state;
+
+ /** Uniforms for stencil state.
+ *
+ * Index 0 is either the front config, or the front-and-back config.
+ * Index 1 is the back config if doing separate back stencil.
+ * Index 2 is the writemask config if it's not a common mask value.
+ */
+ uint32_t stencil_uniforms[3];
+
+ uint8_t stencil_front[6];
+ uint8_t stencil_back[6];
+};
+
+#define perf_debug(...) do { \
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
+ fprintf(stderr, __VA_ARGS__); \
+} while (0)
+
+static inline struct vc5_context *
+vc5_context(struct pipe_context *pcontext)
+{
+ return (struct vc5_context *)pcontext;
+}
+
+static inline struct vc5_sampler_view *
+vc5_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc5_sampler_view *)psview;
+}
+
+static inline struct vc5_sampler_state *
+vc5_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc5_sampler_state *)psampler;
+}
+
+struct pipe_context *vc5_context_create(struct pipe_screen *pscreen,
+ void *priv, unsigned flags);
+void vc5_program_init(struct pipe_context *pctx);
+void vc5_program_fini(struct pipe_context *pctx);
+void vc5_query_init(struct pipe_context *pctx);
+
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+int vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_v3d_submit_cl *args,
+ struct vc5_job *job);
+int vc5_simulator_ioctl(int fd, unsigned long request, void *arg);
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size);
+
+static inline int
+vc5_ioctl(int fd, unsigned long request, void *arg)
+{
+ if (using_vc5_simulator)
+ return vc5_simulator_ioctl(fd, request, arg);
+ else
+ return drmIoctl(fd, request, arg);
+}
+
+void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader);
+struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate);
+
+void vc5_flush(struct pipe_context *pctx);
+void vc5_job_init(struct vc5_context *vc5);
+struct vc5_job *vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs,
+ struct pipe_surface *zsbuf);
+struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5);
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc);
+void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job);
+void vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode);
+
+bool vc5_rt_format_supported(const struct v3d_device_info *devinfo,
+ enum pipe_format f);
+bool vc5_tex_format_supported(const struct v3d_device_info *devinfo,
+ enum pipe_format f);
+uint8_t vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f);
+uint8_t vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f);
+uint8_t vc5_get_tex_return_size(const struct v3d_device_info *devinfo,
+ enum pipe_format f,
+ enum pipe_tex_compare compare);
+uint8_t vc5_get_tex_return_channels(const struct v3d_device_info *devinfo,
+ enum pipe_format f);
+const uint8_t *vc5_get_format_swizzle(const struct v3d_device_info *devinfo,
+ enum pipe_format f);
+void vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo,
+ uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp);
+
+void vc5_init_query_functions(struct vc5_context *vc5);
+void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
+void vc5_blitter_save(struct vc5_context *vc5);
+
+struct vc5_fence *vc5_fence_create(struct vc5_context *vc5);
+
+#ifdef v3dX
+# include "v3dx_context.h"
+#else
+# define v3dX(x) v3d33_##x
+# include "v3dx_context.h"
+# undef v3dX
+
+# define v3dX(x) v3d41_##x
+# include "v3dx_context.h"
+# undef v3dX
+#endif
+
+#endif /* VC5_CONTEXT_H */
diff --git a/src/gallium/drivers/v3d/v3d_fence.c b/src/gallium/drivers/v3d/v3d_fence.c
new file mode 100644
index 00000000000..54bce562403
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_fence.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed. The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "v3d_context.h"
+#include "v3d_bufmgr.h"
+
+struct vc5_fence {
+ struct pipe_reference reference;
+ uint32_t sync;
+};
+
+static void
+vc5_fence_reference(struct pipe_screen *pscreen,
+ struct pipe_fence_handle **pp,
+ struct pipe_fence_handle *pf)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_fence **p = (struct vc5_fence **)pp;
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+ struct vc5_fence *old = *p;
+
+ if (pipe_reference(&(*p)->reference, &f->reference)) {
+ drmSyncobjDestroy(screen->fd, old->sync);
+ free(old);
+ }
+ *p = f;
+}
+
+static boolean
+vc5_fence_finish(struct pipe_screen *pscreen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *pf,
+ uint64_t timeout_ns)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+
+ return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL);
+}
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_context *vc5)
+{
+ struct vc5_fence *f = calloc(1, sizeof(*f));
+ if (!f)
+ return NULL;
+
+ uint32_t new_sync;
+ /* Make a new sync object for the context. */
+ int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+ &new_sync);
+ if (ret) {
+ free(f);
+ return NULL;
+ }
+
+ pipe_reference_init(&f->reference, 1);
+ f->sync = vc5->out_sync;
+ vc5->out_sync = new_sync;
+
+ return f;
+}
+
+void
+vc5_fence_init(struct vc5_screen *screen)
+{
+ screen->base.fence_reference = vc5_fence_reference;
+ screen->base.fence_finish = vc5_fence_finish;
+}
diff --git a/src/gallium/drivers/v3d/v3d_format_table.h b/src/gallium/drivers/v3d/v3d_format_table.h
new file mode 100644
index 00000000000..8b8011351a1
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_format_table.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2014-2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct vc5_format {
+ /** Set if the pipe format is defined in the table. */
+ bool present;
+
+ /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+ uint8_t rt_type;
+
+ /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+ uint8_t tex_type;
+
+ /**
+ * Swizzle to apply to the RGBA shader output for storing to the tile
+ * buffer, to the RGBA tile buffer to produce shader input (for
+ * blending), and for turning the rgba8888 texture sampler return
+ * value into shader rgba values.
+ */
+ uint8_t swizzle[4];
+
+ /* Whether the return value is 16F/I/UI or 32F/I/UI. */
+ uint8_t return_size;
+
+ /* If return_size == 32, how many channels are returned by texturing.
+ * 16 always returns 2 pairs of 16 bit values.
+ */
+ uint8_t return_channels;
+};
diff --git a/src/gallium/drivers/v3d/v3d_formats.c b/src/gallium/drivers/v3d/v3d_formats.c
new file mode 100644
index 00000000000..8424b368cf4
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_formats.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_formats.c
+ *
+ * Contains the table and accessors for VC5 texture and render target format
+ * support.
+ *
+ * The hardware has limited support for texture formats, and extremely limited
+ * support for render target formats. As a result, we emulate other formats
+ * in our shader code, and this stores the table for doing so.
+ */
+
+#include "util/macros.h"
+
+#include "v3d_context.h"
+#include "v3d_format_table.h"
+
+static const struct vc5_format *
+get_format(const struct v3d_device_info *devinfo, enum pipe_format f)
+{
+ if (devinfo->ver >= 41)
+ return v3d41_get_format_desc(f);
+ else
+ return v3d33_get_format_desc(f);
+}
+
+bool
+vc5_rt_format_supported(const struct v3d_device_info *devinfo,
+ enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ if (!vf)
+ return false;
+
+ return vf->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO;
+}
+
+uint8_t
+vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ if (!vf)
+ return 0;
+
+ return vf->rt_type;
+}
+
+bool
+vc5_tex_format_supported(const struct v3d_device_info *devinfo,
+ enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ return vf != NULL;
+}
+
+uint8_t
+vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ if (!vf)
+ return 0;
+
+ return vf->tex_type;
+}
+
+uint8_t
+vc5_get_tex_return_size(const struct v3d_device_info *devinfo,
+ enum pipe_format f, enum pipe_tex_compare compare)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ if (!vf)
+ return 0;
+
+ if (compare == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ return 16;
+
+ return vf->return_size;
+}
+
+uint8_t
+vc5_get_tex_return_channels(const struct v3d_device_info *devinfo,
+ enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+
+ if (!vf)
+ return 0;
+
+ return vf->return_channels;
+}
+
+const uint8_t *
+vc5_get_format_swizzle(const struct v3d_device_info *devinfo, enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(devinfo, f);
+ static const uint8_t fallback[] = {0, 1, 2, 3};
+
+ if (!vf)
+ return fallback;
+
+ return vf->swizzle;
+}
+
+void
+vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo,
+ uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp)
+{
+ if (devinfo->ver >= 41) {
+ return v3d41_get_internal_type_bpp_for_output_format(format,
+ type, bpp);
+ } else {
+ return v3d33_get_internal_type_bpp_for_output_format(format,
+ type, bpp);
+ }
+}
diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c
new file mode 100644
index 00000000000..85c64df34ca
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_job.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_job.c
+ *
+ * Functions for submitting VC5 render jobs to the kernel.
+ */
+
+#include <xf86drm.h>
+#include "v3d_context.h"
+/* The OQ/semaphore packets are the same across V3D versions. */
+#define V3D_VERSION 33
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/common/v3d_macros.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "broadcom/clif/clif_dump.h"
+
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static void
+vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
+{
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ vc5_bo_unreference(&bo);
+ }
+
+ remove_from_ht(vc5->jobs, &job->key);
+
+ if (job->write_prscs) {
+ struct set_entry *entry;
+
+ set_foreach(job->write_prscs, entry) {
+ const struct pipe_resource *prsc = entry->key;
+
+ remove_from_ht(vc5->write_jobs, (void *)prsc);
+ }
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], NULL);
+ }
+ }
+ if (job->zsbuf) {
+ remove_from_ht(vc5->write_jobs, job->zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, NULL);
+ }
+
+ if (vc5->job == job)
+ vc5->job = NULL;
+
+ vc5_destroy_cl(&job->bcl);
+ vc5_destroy_cl(&job->rcl);
+ vc5_destroy_cl(&job->indirect);
+ vc5_bo_unreference(&job->tile_alloc);
+ vc5_bo_unreference(&job->tile_state);
+
+ ralloc_free(job);
+}
+
+static struct vc5_job *
+vc5_job_create(struct vc5_context *vc5)
+{
+ struct vc5_job *job = rzalloc(vc5, struct vc5_job);
+
+ job->vc5 = vc5;
+
+ vc5_init_cl(job, &job->bcl);
+ vc5_init_cl(job, &job->rcl);
+ vc5_init_cl(job, &job->indirect);
+
+ job->draw_min_x = ~0;
+ job->draw_min_y = ~0;
+ job->draw_max_x = 0;
+ job->draw_max_y = 0;
+
+ job->bos = _mesa_set_create(job,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ return job;
+}
+
+void
+vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo)
+{
+ if (!bo)
+ return;
+
+ if (_mesa_set_search(job->bos, bo))
+ return;
+
+ vc5_bo_reference(bo);
+ _mesa_set_add(job->bos, bo);
+ job->referenced_size += bo->size;
+
+ uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
+
+ if (job->submit.bo_handle_count >= job->bo_handles_size) {
+ job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
+ bo_handles = reralloc(job, bo_handles,
+ uint32_t, job->bo_handles_size);
+ job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
+ }
+ bo_handles[job->submit.bo_handle_count++] = bo->handle;
+}
+
+void
+vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc)
+{
+ struct vc5_context *vc5 = job->vc5;
+
+ if (!job->write_prscs) {
+ job->write_prscs = _mesa_set_create(job,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ }
+
+ _mesa_set_add(job->write_prscs, prsc);
+ _mesa_hash_table_insert(vc5->write_jobs, prsc, job);
+}
+
+void
+vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+void
+vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+
+ if (_mesa_set_search(job->bos, rsc->bo)) {
+ vc5_job_submit(vc5, job);
+ /* Reminder: vc5->jobs is safe to keep iterating even
+ * after deletion of an entry.
+ */
+ continue;
+ }
+ }
+}
+
+static void
+vc5_job_set_tile_buffer_size(struct vc5_job *job)
+{
+ static const uint8_t tile_sizes[] = {
+ 64, 64,
+ 64, 32,
+ 32, 32,
+ 32, 16,
+ 16, 16,
+ };
+ int tile_size_index = 0;
+ if (job->msaa)
+ tile_size_index += 2;
+
+ if (job->cbufs[3] || job->cbufs[2])
+ tile_size_index += 2;
+ else if (job->cbufs[1])
+ tile_size_index++;
+
+ int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ struct vc5_surface *surf = vc5_surface(job->cbufs[i]);
+ max_bpp = MAX2(max_bpp, surf->internal_bpp);
+ }
+ }
+ job->internal_bpp = max_bpp;
+ STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
+ tile_size_index += max_bpp;
+
+ assert(tile_size_index < ARRAY_SIZE(tile_sizes));
+ job->tile_width = tile_sizes[tile_size_index * 2 + 0];
+ job->tile_height = tile_sizes[tile_size_index * 2 + 1];
+}
+
+/**
+ * Returns a vc5_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one. If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc5_job *
+vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
+{
+ /* Return the existing job for this FBO if we have one */
+ struct vc5_job_key local_key = {
+ .cbufs = {
+ cbufs[0],
+ cbufs[1],
+ cbufs[2],
+ cbufs[3],
+ },
+ .zsbuf = zsbuf,
+ };
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs,
+ &local_key);
+ if (entry)
+ return entry->data;
+
+ /* Creating a new job. Make sure that any previous jobs reading or
+ * writing these buffers are flushed.
+ */
+ struct vc5_job *job = vc5_job_create(vc5);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i]) {
+ vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], cbufs[i]);
+
+ if (cbufs[i]->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+ }
+ if (zsbuf) {
+ vc5_flush_jobs_reading_resource(vc5, zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, zsbuf);
+ if (zsbuf->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+
+ vc5_job_set_tile_buffer_size(job);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i])
+ _mesa_hash_table_insert(vc5->write_jobs,
+ cbufs[i]->texture, job);
+ }
+ if (zsbuf)
+ _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job);
+
+ memcpy(&job->key, &local_key, sizeof(local_key));
+ _mesa_hash_table_insert(vc5->jobs, &job->key, job);
+
+ return job;
+}
+
+struct vc5_job *
+vc5_get_job_for_fbo(struct vc5_context *vc5)
+{
+ if (vc5->job)
+ return vc5->job;
+
+ struct pipe_surface **cbufs = vc5->framebuffer.cbufs;
+ struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf;
+ struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf);
+
+ /* The dirty flags are tracking what's been updated while vc5->job has
+ * been bound, so set them all to ~0 when switching between jobs. We
+ * also need to reset all state at the start of rendering.
+ */
+ vc5->dirty = ~0;
+
+ /* If we're binding to uninitialized buffers, no need to load their
+ * contents before drawing.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (cbufs[i]) {
+ struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_COLOR0 << i;
+ }
+ }
+
+ if (zsbuf) {
+ struct vc5_resource *rsc = vc5_resource(zsbuf->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ }
+
+ job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width,
+ job->tile_width);
+ job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height,
+ job->tile_height);
+
+ vc5->job = job;
+
+ return job;
+}
+
+static bool
+vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr)
+{
+ struct vc5_job *job = data;
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (void *)entry->key;
+
+ if (addr >= bo->offset &&
+ addr < bo->offset + bo->size) {
+ vc5_bo_map(bo);
+ *vaddr = bo->map + addr - bo->offset;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job)
+{
+ if (!(V3D_DEBUG & V3D_DEBUG_CL))
+ return;
+
+ struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo,
+ stderr, vc5_clif_dump_lookup,
+ job);
+
+ fprintf(stderr, "BCL: 0x%08x..0x%08x\n",
+ job->submit.bcl_start, job->submit.bcl_end);
+
+ clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end);
+
+ fprintf(stderr, "RCL: 0x%08x..0x%08x\n",
+ job->submit.rcl_start, job->submit.rcl_end);
+ clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end);
+}
+
+/**
+ * Submits the job to the kernel and then reinitializes it.
+ */
+void
+vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
+{
+ MAYBE_UNUSED struct vc5_screen *screen = vc5->screen;
+
+ if (!job->needs_flush)
+ goto done;
+
+ if (vc5->screen->devinfo.ver >= 41)
+ v3d41_emit_rcl(job);
+ else
+ v3d33_emit_rcl(job);
+
+ if (cl_offset(&job->bcl) > 0) {
+ if (screen->devinfo.ver >= 41)
+ v3d41_bcl_epilogue(vc5, job);
+ else
+ v3d33_bcl_epilogue(vc5, job);
+ }
+
+ job->submit.out_sync = vc5->out_sync;
+ job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
+ job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
+
+ /* On V3D 4.1, the tile alloc/state setup moved to register writes
+ * instead of binner packets.
+ */
+ if (screen->devinfo.ver >= 41) {
+ vc5_job_add_bo(job, job->tile_alloc);
+ job->submit.qma = job->tile_alloc->offset;
+ job->submit.qms = job->tile_alloc->size;
+
+ vc5_job_add_bo(job, job->tile_state);
+ job->submit.qts = job->tile_state->offset;
+ }
+
+ vc5_clif_dump(vc5, job);
+
+ if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
+ int ret;
+
+#ifndef USE_V3D_SIMULATOR
+ ret = drmIoctl(vc5->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
+#else
+ ret = vc5_simulator_flush(vc5, &job->submit, job);
+#endif
+ static bool warned = false;
+ if (ret && !warned) {
+ fprintf(stderr, "Draw call returned %s. "
+ "Expect corruption.\n", strerror(errno));
+ warned = true;
+ }
+ }
+
+done:
+ vc5_job_free(vc5, job);
+}
+
+static bool
+vc5_job_compare(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct vc5_job_key)) == 0;
+}
+
+static uint32_t
+vc5_job_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct vc5_job_key));
+}
+
+void
+vc5_job_init(struct vc5_context *vc5)
+{
+ vc5->jobs = _mesa_hash_table_create(vc5,
+ vc5_job_hash,
+ vc5_job_compare);
+ vc5->write_jobs = _mesa_hash_table_create(vc5,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
new file mode 100644
index 00000000000..ce2e0be8ed2
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+#include "util/hash_table.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "nir/tgsi_to_nir.h"
+#include "compiler/v3d_compiler.h"
+#include "v3d_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "mesa/state_tracker/st_glsl_types.h"
+
+static gl_varying_slot
+vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
+{
+ nir_foreach_variable(var, &s->outputs) {
+ if (var->data.driver_location == driver_location) {
+ return var->data.location;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
+ *
+ * A shader can have 16 of these specs, and each one of them can write up to
+ * 16 dwords. Since we allow a total of 64 transform feedback output
+ * components (not 16 vectors), we have to group the writes of multiple
+ * varyings together in a single data spec.
+ */
+static void
+vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
+ const struct pipe_stream_output_info *stream_output)
+{
+ if (!stream_output->num_outputs)
+ return;
+
+ struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
+ int slot_count = 0;
+
+ for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
+ uint32_t buffer_offset = 0;
+ uint32_t vpm_start = slot_count;
+
+ for (int i = 0; i < stream_output->num_outputs; i++) {
+ const struct pipe_stream_output *output =
+ &stream_output->output[i];
+
+ if (output->output_buffer != buffer)
+ continue;
+
+ /* We assume that the SO outputs appear in increasing
+ * order in the buffer.
+ */
+ assert(output->dst_offset >= buffer_offset);
+
+ /* Pad any undefined slots in the output */
+ for (int j = buffer_offset; j < output->dst_offset; j++) {
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
+ slot_count++;
+ buffer_offset++;
+ }
+
+ /* Set the coordinate shader up to output the
+ * components of this varying.
+ */
+ for (int j = 0; j < output->num_components; j++) {
+ gl_varying_slot slot =
+ vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index);
+
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(slot,
+ output->start_component + j);
+ slot_count++;
+ buffer_offset++;
+ }
+ }
+
+ uint32_t vpm_size = slot_count - vpm_start;
+ if (!vpm_size)
+ continue;
+
+ uint32_t vpm_start_offset = vpm_start + 6;
+
+ while (vpm_size) {
+ uint32_t write_size = MIN2(vpm_size, 1 << 4);
+
+ struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+ /* We need the offset from the coordinate shader's VPM
+ * output block, which has the [X, Y, Z, W, Xs, Ys]
+ * values at the start.
+ */
+ .first_shaded_vertex_value_to_output = vpm_start_offset,
+ .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1,
+ .output_buffer_to_write_to = buffer,
+ };
+
+ /* GFXH-1559 */
+ assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
+ so->num_tf_specs != 0);
+
+ assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
+ V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+ (void *)&so->tf_specs[so->num_tf_specs],
+ &unpacked);
+
+ /* If point size is being written by the shader, then
+ * all the VPM start offsets are shifted up by one.
+ * We won't know that until the variant is compiled,
+ * though.
+ */
+ unpacked.first_shaded_vertex_value_to_output++;
+
+ /* GFXH-1559 */
+ assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
+ so->num_tf_specs != 0);
+
+ V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+ (void *)&so->tf_specs_psiz[so->num_tf_specs],
+ &unpacked);
+ so->num_tf_specs++;
+ vpm_start_offset += write_size;
+ vpm_size -= write_size;
+ }
+ }
+
+ so->num_tf_outputs = slot_count;
+ so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
+ slot_count);
+ memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static int
+uniforms_type_size(const struct glsl_type *type)
+{
+ return st_glsl_storage_type_size(type, false);
+}
+
+static void *
+vc5_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader);
+ if (!so)
+ return NULL;
+
+ so->program_id = vc5->next_uncompiled_program_id++;
+
+ nir_shader *s;
+
+ if (cso->type == PIPE_SHADER_IR_NIR) {
+ /* The backend takes ownership of the NIR shader on state
+ * creation.
+ */
+ s = cso->ir.nir;
+
+ NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
+ type_size,
+ (nir_lower_io_options)0);
+ NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
+ uniforms_type_size,
+ (nir_lower_io_options)0);
+ } else {
+ assert(cso->type == PIPE_SHADER_IR_TGSI);
+
+ if (V3D_DEBUG & V3D_DEBUG_TGSI) {
+ fprintf(stderr, "prog %d TGSI:\n",
+ so->program_id);
+ tgsi_dump(cso->tokens, 0);
+ fprintf(stderr, "\n");
+ }
+ s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
+
+ so->was_tgsi = true;
+ }
+
+ NIR_PASS_V(s, nir_opt_global_to_local);
+ NIR_PASS_V(s, nir_lower_regs_to_ssa);
+ NIR_PASS_V(s, nir_normalize_cubemap_coords);
+
+ NIR_PASS_V(s, nir_lower_load_const_to_scalar);
+
+ v3d_optimize_nir(s);
+
+ NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+
+ /* Garbage collect dead instructions */
+ nir_sweep(s);
+
+ so->base.type = PIPE_SHADER_IR_NIR;
+ so->base.ir.nir = s;
+
+ vc5_set_transform_feedback_outputs(so, &cso->stream_output);
+
+ if (V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage(s->info.stage))) {
+ fprintf(stderr, "%s prog %d NIR:\n",
+ gl_shader_stage_name(s->info.stage),
+ so->program_id);
+ nir_print_shader(s, stderr);
+ fprintf(stderr, "\n");
+ }
+
+ return so;
+}
+
+static struct vc5_compiled_shader *
+vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key)
+{
+ struct vc5_uncompiled_shader *shader_state = key->shader_state;
+ nir_shader *s = shader_state->base.ir.nir;
+
+ struct hash_table *ht;
+ uint32_t key_size;
+ if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ ht = vc5->fs_cache;
+ key_size = sizeof(struct v3d_fs_key);
+ } else {
+ ht = vc5->vs_cache;
+ key_size = sizeof(struct v3d_vs_key);
+ }
+
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ if (entry)
+ return entry->data;
+
+ struct vc5_compiled_shader *shader =
+ rzalloc(NULL, struct vc5_compiled_shader);
+
+ int program_id = shader_state->program_id;
+ int variant_id =
+ p_atomic_inc_return(&shader_state->compiled_variant_count);
+ uint64_t *qpu_insts;
+ uint32_t shader_size;
+
+ switch (s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
+
+ qpu_insts = v3d_compile_vs(vc5->screen->compiler,
+ (struct v3d_vs_key *)key,
+ shader->prog_data.vs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
+
+ qpu_insts = v3d_compile_fs(vc5->screen->compiler,
+ (struct v3d_fs_key *)key,
+ shader->prog_data.fs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ default:
+ unreachable("bad stage");
+ }
+
+ vc5_set_shader_uniform_dirty_flags(shader);
+
+ shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader");
+ vc5_bo_map(shader->bo);
+ memcpy(shader->bo->map, qpu_insts, shader_size);
+
+ free(qpu_insts);
+
+ struct vc5_key *dup_key;
+ dup_key = ralloc_size(shader, key_size);
+ memcpy(dup_key, key, key_size);
+ _mesa_hash_table_insert(ht, dup_key, shader);
+
+ if (shader->prog_data.base->spill_size >
+ vc5->prog.spill_size_per_thread) {
+ /* Max 4 QPUs per slice, 3 slices per core. We only do single
+ * core so far. This overallocates memory on smaller cores.
+ */
+ int total_spill_size =
+ 4 * 3 * shader->prog_data.base->spill_size;
+
+ vc5_bo_unreference(&vc5->prog.spill_bo);
+ vc5->prog.spill_bo = vc5_bo_alloc(vc5->screen,
+ total_spill_size, "spill");
+ vc5->prog.spill_size_per_thread =
+ shader->prog_data.base->spill_size;
+ }
+
+ return shader;
+}
+
+static void
+vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key,
+ struct vc5_texture_stateobj *texstate)
+{
+ const struct v3d_device_info *devinfo = &vc5->screen->devinfo;
+
+ for (int i = 0; i < texstate->num_textures; i++) {
+ struct pipe_sampler_view *sampler = texstate->textures[i];
+ struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler);
+ struct pipe_sampler_state *sampler_state =
+ texstate->samplers[i];
+
+ if (!sampler)
+ continue;
+
+ key->tex[i].return_size =
+ vc5_get_tex_return_size(devinfo,
+ sampler->format,
+ sampler_state->compare_mode);
+
+ /* For 16-bit, we set up the sampler to always return 2
+ * channels (meaning no recompiles for most statechanges),
+ * while for 32 we actually scale the returns with channels.
+ */
+ if (key->tex[i].return_size == 16) {
+ key->tex[i].return_channels = 2;
+ } else if (devinfo->ver > 40) {
+ key->tex[i].return_channels = 4;
+ } else {
+ key->tex[i].return_channels =
+ vc5_get_tex_return_channels(devinfo,
+ sampler->format);
+ }
+
+ if (key->tex[i].return_size == 32 && devinfo->ver < 40) {
+ memcpy(key->tex[i].swizzle,
+ vc5_sampler->swizzle,
+ sizeof(vc5_sampler->swizzle));
+ } else {
+ /* For 16-bit returns, we let the sampler state handle
+ * the swizzle.
+ */
+ key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+ key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+ key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+ key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+ }
+
+ if (sampler) {
+ key->tex[i].compare_mode = sampler_state->compare_mode;
+ key->tex[i].compare_func = sampler_state->compare_func;
+ key->tex[i].clamp_s =
+ sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP;
+ key->tex[i].clamp_t =
+ sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP;
+ key->tex[i].clamp_r =
+ sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP;
+ }
+ }
+
+ key->ucp_enables = vc5->rasterizer->base.clip_plane_enable;
+}
+
+static void
+vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct vc5_job *job = vc5->job;
+ struct v3d_fs_key local_key;
+ struct v3d_fs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_FRAMEBUFFER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_SAMPLE_MASK |
+ VC5_DIRTY_FRAGTEX |
+ VC5_DIRTY_UNCOMPILED_FS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex);
+ key->base.shader_state = vc5->prog.bind_fs;
+ key->is_points = (prim_mode == PIPE_PRIM_POINTS);
+ key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
+ prim_mode <= PIPE_PRIM_LINE_STRIP);
+ key->clamp_color = vc5->rasterizer->base.clamp_fragment_color;
+ if (vc5->blend->logicop_enable) {
+ key->logicop_func = vc5->blend->logicop_func;
+ } else {
+ key->logicop_func = PIPE_LOGICOP_COPY;
+ }
+ if (job->msaa) {
+ key->msaa = vc5->rasterizer->base.multisample;
+ key->sample_coverage = (vc5->rasterizer->base.multisample &&
+ vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+ key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage;
+ key->sample_alpha_to_one = vc5->blend->alpha_to_one;
+ }
+
+ key->depth_enabled = (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled);
+ if (vc5->zsa->base.alpha.enabled) {
+ key->alpha_test = true;
+ key->alpha_test_func = vc5->zsa->base.alpha.func;
+ }
+
+ /* gl_FragColor's propagation to however many bound color buffers
+ * there are means that the buffer count needs to be in the key.
+ */
+ key->nr_cbufs = vc5->framebuffer.nr_cbufs;
+ key->swap_color_rb = vc5->swap_color_rb;
+
+ for (int i = 0; i < key->nr_cbufs; i++) {
+ struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i];
+ if (!cbuf)
+ continue;
+
+ const struct util_format_description *desc =
+ util_format_description(cbuf->format);
+
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
+ desc->channel[0].size == 32) {
+ key->f32_color_rb |= 1 << i;
+ }
+
+ if (vc5->prog.bind_fs->was_tgsi) {
+ if (util_format_is_pure_uint(cbuf->format))
+ key->uint_color_rb |= 1 << i;
+ else if (util_format_is_pure_sint(cbuf->format))
+ key->int_color_rb |= 1 << i;
+ }
+ }
+
+ if (key->is_points) {
+ key->point_sprite_mask =
+ vc5->rasterizer->base.sprite_coord_enable;
+ key->point_coord_upper_left =
+ (vc5->rasterizer->base.sprite_coord_mode ==
+ PIPE_SPRITE_COORD_UPPER_LEFT);
+ }
+
+ key->light_twoside = vc5->rasterizer->base.light_twoside;
+ key->shade_model_flat = vc5->rasterizer->base.flatshade;
+
+ struct vc5_compiled_shader *old_fs = vc5->prog.fs;
+ vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base);
+ if (vc5->prog.fs == old_fs)
+ return;
+
+ vc5->dirty |= VC5_DIRTY_COMPILED_FS;
+
+ if (old_fs) {
+ if (vc5->prog.fs->prog_data.fs->flat_shade_flags !=
+ old_fs->prog_data.fs->flat_shade_flags) {
+ vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+ }
+
+ if (vc5->prog.fs->prog_data.fs->centroid_flags !=
+ old_fs->prog_data.fs->centroid_flags) {
+ vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS;
+ }
+ }
+
+ if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,
+ old_fs->prog_data.fs->input_slots,
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots))) {
+ vc5->dirty |= VC5_DIRTY_FS_INPUTS;
+ }
+}
+
+static void
+vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct v3d_vs_key local_key;
+ struct v3d_vs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_VERTTEX |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_UNCOMPILED_VS |
+ VC5_DIRTY_FS_INPUTS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->verttex);
+ key->base.shader_state = vc5->prog.bind_vs;
+ key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs;
+ STATIC_ASSERT(sizeof(key->fs_inputs) ==
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->fs_inputs));
+ key->clamp_color = vc5->rasterizer->base.clamp_vertex_color;
+
+ key->per_vertex_point_size =
+ (prim_mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ struct vc5_compiled_shader *vs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (vs != vc5->prog.vs) {
+ vc5->prog.vs = vs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_VS;
+ }
+
+ key->is_coord = true;
+ /* Coord shaders only output varyings used by transform feedback. */
+ struct vc5_uncompiled_shader *shader_state = key->base.shader_state;
+ memcpy(key->fs_inputs, shader_state->tf_outputs,
+ sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_fs_inputs) {
+ memset(&key->fs_inputs[shader_state->num_tf_outputs],
+ 0,
+ sizeof(*key->fs_inputs) * (key->num_fs_inputs -
+ shader_state->num_tf_outputs));
+ }
+ key->num_fs_inputs = shader_state->num_tf_outputs;
+
+ struct vc5_compiled_shader *cs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (cs != vc5->prog.cs) {
+ vc5->prog.cs = cs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_CS;
+ }
+}
+
+void
+vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ vc5_update_compiled_fs(vc5, prim_mode);
+ vc5_update_compiled_vs(vc5, prim_mode);
+}
+
+static uint32_t
+fs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
+}
+
+static uint32_t
+vs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
+}
+
+static bool
+fs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
+}
+
+static bool
+vs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
+}
+
+static void
+delete_from_cache_if_matches(struct hash_table *ht,
+ struct vc5_compiled_shader **last_compile,
+ struct hash_entry *entry,
+ struct vc5_uncompiled_shader *so)
+{
+ const struct v3d_key *key = entry->key;
+
+ if (key->shader_state == so) {
+ struct vc5_compiled_shader *shader = entry->data;
+ _mesa_hash_table_remove(ht, entry);
+ vc5_bo_unreference(&shader->bo);
+
+ if (shader == *last_compile)
+ *last_compile = NULL;
+
+ ralloc_free(shader);
+ }
+}
+
+static void
+vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = hwcso;
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs,
+ entry, so);
+ }
+ hash_table_foreach(vc5->vs_cache, entry) {
+ delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs,
+ entry, so);
+ }
+
+ ralloc_free(so->base.ir.nir);
+ free(so);
+}
+
+static void
+vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_fs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS;
+}
+
+static void
+vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_vs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS;
+}
+
+void
+vc5_program_init(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ pctx->create_vs_state = vc5_shader_state_create;
+ pctx->delete_vs_state = vc5_shader_state_delete;
+
+ pctx->create_fs_state = vc5_shader_state_create;
+ pctx->delete_fs_state = vc5_shader_state_delete;
+
+ pctx->bind_fs_state = vc5_fp_state_bind;
+ pctx->bind_vs_state = vc5_vp_state_bind;
+
+ vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
+ fs_cache_compare);
+ vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
+ vs_cache_compare);
+}
+
+void
+vc5_program_fini(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->fs_cache, entry);
+ }
+
+ hash_table_foreach(vc5->vs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->vs_cache, entry);
+ }
+}
diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c
new file mode 100644
index 00000000000..f645544bedf
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_query.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Gallium query object support.
+ *
+ * The HW has native support for occlusion queries, with the query result
+ * being loaded and stored by the TLB unit. From a SW perspective, we have to
+ * be careful to make sure that the jobs that need to be tracking queries are
+ * bracketed by the start and end of counting, even across FBO transitions.
+ *
+ * For the transform feedback PRIMITIVES_GENERATED/WRITTEN queries, we have to
+ * do the calculations in software at draw time.
+ */
+
+#include "v3d_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+struct vc5_query
+{
+ enum pipe_query_type type;
+ struct vc5_bo *bo;
+
+ uint32_t start, end;
+};
+
+static struct pipe_query *
+vc5_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
+{
+ struct vc5_query *q = calloc(1, sizeof(*q));
+
+ q->type = query_type;
+
+ /* Note that struct pipe_query isn't actually defined anywhere. */
+ return (struct pipe_query *)q;
+}
+
+static void
+vc5_destroy_query(struct pipe_context *pctx, struct pipe_query *query)
+{
+ struct vc5_query *q = (struct vc5_query *)query;
+
+ vc5_bo_unreference(&q->bo);
+ free(q);
+}
+
+static boolean
+vc5_begin_query(struct pipe_context *pctx, struct pipe_query *query)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_query *q = (struct vc5_query *)query;
+
+ switch (q->type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ q->start = vc5->prims_generated;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ q->start = vc5->tf_prims_generated;
+ break;
+ default:
+ q->bo = vc5_bo_alloc(vc5->screen, 4096, "query");
+
+ uint32_t *map = vc5_bo_map(q->bo);
+ *map = 0;
+ vc5->current_oq = q->bo;
+ vc5->dirty |= VC5_DIRTY_OQ;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+vc5_end_query(struct pipe_context *pctx, struct pipe_query *query)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_query *q = (struct vc5_query *)query;
+
+ switch (q->type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ q->end = vc5->prims_generated;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ q->end = vc5->tf_prims_generated;
+ break;
+ default:
+ vc5->current_oq = NULL;
+ vc5->dirty |= VC5_DIRTY_OQ;
+ break;
+ }
+
+ return true;
+}
+
+static boolean
+vc5_get_query_result(struct pipe_context *pctx, struct pipe_query *query,
+ boolean wait, union pipe_query_result *vresult)
+{
+ struct vc5_query *q = (struct vc5_query *)query;
+ uint32_t result = 0;
+
+ if (q->bo) {
+ /* XXX: Only flush the jobs using this BO. */
+ vc5_flush(pctx);
+
+ if (wait) {
+ if (!vc5_bo_wait(q->bo, 0, "query"))
+ return false;
+ } else {
+ if (!vc5_bo_wait(q->bo, ~0ull, "query"))
+ return false;
+ }
+
+ /* XXX: Sum up per-core values. */
+ uint32_t *map = vc5_bo_map(q->bo);
+ result = *map;
+
+ vc5_bo_unreference(&q->bo);
+ }
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ vresult->u64 = result;
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ vresult->b = result != 0;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ vresult->u64 = q->end - q->start;
+ break;
+ default:
+ unreachable("unsupported query type");
+ }
+
+ return true;
+}
+
+static void
+vc5_set_active_query_state(struct pipe_context *pctx, boolean enable)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5->active_queries = enable;
+ vc5->dirty |= VC5_DIRTY_OQ;
+ vc5->dirty |= VC5_DIRTY_STREAMOUT;
+}
+
+void
+vc5_query_init(struct pipe_context *pctx)
+{
+ pctx->create_query = vc5_create_query;
+ pctx->destroy_query = vc5_destroy_query;
+ pctx->begin_query = vc5_begin_query;
+ pctx->end_query = vc5_end_query;
+ pctx->get_query_result = vc5_get_query_result;
+ pctx->set_active_query_state = vc5_set_active_query_state;
+}
+
diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c
new file mode 100644
index 00000000000..1cd3f1949a2
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_blit.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_transfer_helper.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_format_zs.h"
+
+#include "drm_fourcc.h"
+#include "v3d_screen.h"
+#include "v3d_context.h"
+#include "v3d_resource.h"
+#include "v3d_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+vc5_debug_resource_layout(struct vc5_resource *rsc, const char *caller)
+{
+ if (!(V3D_DEBUG & V3D_DEBUG_SURFACE))
+ return;
+
+ struct pipe_resource *prsc = &rsc->base;
+
+ if (prsc->target == PIPE_BUFFER) {
+ fprintf(stderr,
+ "rsc %s %p (format %s), %dx%d buffer @0x%08x-0x%08x\n",
+ caller, rsc,
+ util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ rsc->bo->offset,
+ rsc->bo->offset + rsc->bo->size - 1);
+ return;
+ }
+
+ static const char *const tiling_descriptions[] = {
+ [VC5_TILING_RASTER] = "R",
+ [VC5_TILING_LINEARTILE] = "LT",
+ [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1",
+ [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2",
+ [VC5_TILING_UIF_NO_XOR] = "UIF",
+ [VC5_TILING_UIF_XOR] = "UIF^",
+ };
+
+ for (int i = 0; i <= prsc->last_level; i++) {
+ struct vc5_resource_slice *slice = &rsc->slices[i];
+
+ int level_width = slice->stride / rsc->cpp;
+ int level_height = slice->padded_height;
+ int level_depth =
+ u_minify(util_next_power_of_two(prsc->depth0), i);
+
+ fprintf(stderr,
+ "rsc %s %p (format %s), %dx%d: "
+ "level %d (%s) %dx%dx%d -> %dx%dx%d, stride %d@0x%08x\n",
+ caller, rsc,
+ util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ i, tiling_descriptions[slice->tiling],
+ u_minify(prsc->width0, i),
+ u_minify(prsc->height0, i),
+ u_minify(prsc->depth0, i),
+ level_width,
+ level_height,
+ level_depth,
+ slice->stride,
+ rsc->bo->offset + slice->offset);
+ }
+}
+
+static bool
+vc5_resource_bo_alloc(struct vc5_resource *rsc)
+{
+ struct pipe_resource *prsc = &rsc->base;
+ struct pipe_screen *pscreen = prsc->screen;
+ struct vc5_bo *bo;
+
+ bo = vc5_bo_alloc(vc5_screen(pscreen), rsc->size, "resource");
+ if (bo) {
+ vc5_bo_unreference(&rsc->bo);
+ rsc->bo = bo;
+ vc5_debug_resource_layout(rsc, "alloc");
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void
+vc5_resource_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *ptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_transfer *trans = vc5_transfer(ptrans);
+
+ if (trans->map) {
+ struct vc5_resource *rsc = vc5_resource(ptrans->resource);
+ struct vc5_resource_slice *slice = &rsc->slices[ptrans->level];
+
+ if (ptrans->usage & PIPE_TRANSFER_WRITE) {
+ for (int z = 0; z < ptrans->box.depth; z++) {
+ void *dst = rsc->bo->map +
+ vc5_layer_offset(&rsc->base,
+ ptrans->level,
+ ptrans->box.z + z);
+ vc5_store_tiled_image(dst,
+ slice->stride,
+ (trans->map +
+ ptrans->stride *
+ ptrans->box.height * z),
+ ptrans->stride,
+ slice->tiling, rsc->cpp,
+ slice->padded_height,
+ &ptrans->box);
+ }
+ }
+ free(trans->map);
+ }
+
+ pipe_resource_reference(&ptrans->resource, NULL);
+ slab_free(&vc5->transfer_pool, ptrans);
+}
+
+static void *
+vc5_resource_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **pptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_transfer *trans;
+ struct pipe_transfer *ptrans;
+ enum pipe_format format = prsc->format;
+ char *buf;
+
+ /* MSAA maps should have been handled by u_transfer_helper. */
+ assert(prsc->nr_samples <= 1);
+
+ /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+ * being mapped.
+ */
+ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
+ prsc->last_level == 0 &&
+ prsc->width0 == box->width &&
+ prsc->height0 == box->height &&
+ prsc->depth0 == box->depth &&
+ prsc->array_size == 1 &&
+ rsc->bo->private) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ if (vc5_resource_bo_alloc(rsc)) {
+ /* If it might be bound as one of our vertex buffers
+ * or UBOs, make sure we re-emit vertex buffer state
+ * or uniforms.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+ if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER)
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+ } else {
+ /* If we failed to reallocate, flush users so that we
+ * don't violate any syncing requirements.
+ */
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ }
+ } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* If we're writing and the buffer is being used by the CL, we
+ * have to flush the CL first. If we're only reading, we need
+ * to flush if the CL has written our buffer.
+ */
+ if (usage & PIPE_TRANSFER_WRITE)
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ else
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+ }
+
+ if (usage & PIPE_TRANSFER_WRITE) {
+ rsc->writes++;
+ rsc->initialized_buffers = ~0;
+ }
+
+ trans = slab_alloc(&vc5->transfer_pool);
+ if (!trans)
+ return NULL;
+
+ /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */
+
+ /* slab_alloc_st() doesn't zero: */
+ memset(trans, 0, sizeof(*trans));
+ ptrans = &trans->base;
+
+ pipe_resource_reference(&ptrans->resource, prsc);
+ ptrans->level = level;
+ ptrans->usage = usage;
+ ptrans->box = *box;
+
+ /* Note that the current kernel implementation is synchronous, so no
+ * need to do syncing stuff here yet.
+ */
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ buf = vc5_bo_map_unsynchronized(rsc->bo);
+ else
+ buf = vc5_bo_map(rsc->bo);
+ if (!buf) {
+ fprintf(stderr, "Failed to map bo\n");
+ goto fail;
+ }
+
+ *pptrans = ptrans;
+
+ /* Our load/store routines work on entire compressed blocks. */
+ ptrans->box.x /= util_format_get_blockwidth(format);
+ ptrans->box.y /= util_format_get_blockheight(format);
+ ptrans->box.width = DIV_ROUND_UP(ptrans->box.width,
+ util_format_get_blockwidth(format));
+ ptrans->box.height = DIV_ROUND_UP(ptrans->box.height,
+ util_format_get_blockheight(format));
+
+ struct vc5_resource_slice *slice = &rsc->slices[level];
+ if (rsc->tiled) {
+ /* No direct mappings of tiled, since we need to manually
+ * tile/untile.
+ */
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ ptrans->stride = ptrans->box.width * rsc->cpp;
+ ptrans->layer_stride = ptrans->stride * ptrans->box.height;
+
+ trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
+
+ if (usage & PIPE_TRANSFER_READ) {
+ for (int z = 0; z < ptrans->box.depth; z++) {
+ void *src = rsc->bo->map +
+ vc5_layer_offset(&rsc->base,
+ ptrans->level,
+ ptrans->box.z + z);
+ vc5_load_tiled_image((trans->map +
+ ptrans->stride *
+ ptrans->box.height * z),
+ ptrans->stride,
+ src,
+ slice->stride,
+ slice->tiling, rsc->cpp,
+ slice->padded_height,
+ &ptrans->box);
+ }
+ }
+ return trans->map;
+ } else {
+ ptrans->stride = slice->stride;
+ ptrans->layer_stride = ptrans->stride;
+
+ return buf + slice->offset +
+ ptrans->box.y * ptrans->stride +
+ ptrans->box.x * rsc->cpp +
+ ptrans->box.z * rsc->cube_map_stride;
+ }
+
+
+fail:
+ vc5_resource_transfer_unmap(pctx, ptrans);
+ return NULL;
+}
+
+static void
+vc5_resource_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ vc5_bo_unreference(&rsc->bo);
+ free(rsc);
+}
+
+static boolean
+vc5_resource_get_handle(struct pipe_screen *pscreen,
+ struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_bo *bo = rsc->bo;
+
+ whandle->stride = rsc->slices[0].stride;
+
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching).
+ */
+ bo->private = false;
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ return vc5_bo_flink(bo, &whandle->handle);
+ case DRM_API_HANDLE_TYPE_KMS:
+ whandle->handle = bo->handle;
+ return TRUE;
+ case DRM_API_HANDLE_TYPE_FD:
+ whandle->handle = vc5_bo_get_dmabuf(bo);
+ return whandle->handle != -1;
+ }
+
+ return FALSE;
+}
+
+#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
+#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1)
+#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
+#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5)
+
+/**
+ * Computes the HW's UIFblock padding for a given height/cpp.
+ *
+ * The goal of the padding is to keep pages of the same color (bank number) at
+ * least half a page away from each other vertically when crossing between
+ * between columns of UIF blocks.
+ */
+static uint32_t
+vc5_get_ub_pad(struct vc5_resource *rsc, uint32_t height)
+{
+ uint32_t utile_h = vc5_utile_height(rsc->cpp);
+ uint32_t uif_block_h = utile_h * 2;
+ uint32_t height_ub = height / uif_block_h;
+
+ uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS;
+
+ /* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */
+ if (height_offset_in_pc == 0)
+ return 0;
+
+ /* Try padding up to where we're offset by at least half a page. */
+ if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) {
+ /* If we fit entirely in the page cache, don't pad. */
+ if (height_ub < PAGE_CACHE_UB_ROWS)
+ return 0;
+ else
+ return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc;
+ }
+
+ /* If we're close to being aligned to page cache size, then round up
+ * and rely on XOR.
+ */
+ if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS)
+ return PAGE_CACHE_UB_ROWS - height_offset_in_pc;
+
+ /* Otherwise, we're far enough away (top and bottom) to not need any
+ * padding.
+ */
+ return 0;
+}
+
+static void
+vc5_setup_slices(struct vc5_resource *rsc)
+{
+ struct pipe_resource *prsc = &rsc->base;
+ uint32_t width = prsc->width0;
+ uint32_t height = prsc->height0;
+ uint32_t depth = prsc->depth0;
+ /* Note that power-of-two padding is based on level 1. These are not
+ * equivalent to just util_next_power_of_two(dimension), because at a
+ * level 0 dimension of 9, the level 1 power-of-two padded value is 4,
+ * not 8.
+ */
+ uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1));
+ uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1));
+ uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1));
+ uint32_t offset = 0;
+ uint32_t utile_w = vc5_utile_width(rsc->cpp);
+ uint32_t utile_h = vc5_utile_height(rsc->cpp);
+ uint32_t uif_block_w = utile_w * 2;
+ uint32_t uif_block_h = utile_h * 2;
+ uint32_t block_width = util_format_get_blockwidth(prsc->format);
+ uint32_t block_height = util_format_get_blockheight(prsc->format);
+ bool msaa = prsc->nr_samples > 1;
+ /* MSAA textures/renderbuffers are always laid out as single-level
+ * UIF.
+ */
+ bool uif_top = msaa;
+
+ for (int i = prsc->last_level; i >= 0; i--) {
+ struct vc5_resource_slice *slice = &rsc->slices[i];
+
+ uint32_t level_width, level_height, level_depth;
+ if (i < 2) {
+ level_width = u_minify(width, i);
+ level_height = u_minify(height, i);
+ } else {
+ level_width = u_minify(pot_width, i);
+ level_height = u_minify(pot_height, i);
+ }
+ if (i < 1)
+ level_depth = u_minify(depth, i);
+ else
+ level_depth = u_minify(pot_depth, i);
+
+ if (msaa) {
+ level_width *= 2;
+ level_height *= 2;
+ }
+
+ level_width = DIV_ROUND_UP(level_width, block_width);
+ level_height = DIV_ROUND_UP(level_height, block_height);
+
+ if (!rsc->tiled) {
+ slice->tiling = VC5_TILING_RASTER;
+ if (prsc->target == PIPE_TEXTURE_1D)
+ level_width = align(level_width, 64 / rsc->cpp);
+ } else {
+ if ((i != 0 || !uif_top) &&
+ (level_width <= utile_w ||
+ level_height <= utile_h)) {
+ slice->tiling = VC5_TILING_LINEARTILE;
+ level_width = align(level_width, utile_w);
+ level_height = align(level_height, utile_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
+ level_width = align(level_width, uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= 2 * uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
+ level_width = align(level_width, 2 * uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else {
+ /* We align the width to a 4-block column of
+ * UIF blocks, but we only align height to UIF
+ * blocks.
+ */
+ level_width = align(level_width,
+ 4 * uif_block_w);
+ level_height = align(level_height,
+ uif_block_h);
+
+ slice->ub_pad = vc5_get_ub_pad(rsc,
+ level_height);
+ level_height += slice->ub_pad * uif_block_h;
+
+ /* If the padding set us to to be aligned to
+ * the page cache size, then the HW will use
+ * the XOR bit on odd columns to get us
+ * perfectly misaligned
+ */
+ if ((level_height / uif_block_h) %
+ (VC5_PAGE_CACHE_SIZE /
+ VC5_UIFBLOCK_ROW_SIZE) == 0) {
+ slice->tiling = VC5_TILING_UIF_XOR;
+ } else {
+ slice->tiling = VC5_TILING_UIF_NO_XOR;
+ }
+ }
+ }
+
+ slice->offset = offset;
+ slice->stride = level_width * rsc->cpp;
+ slice->padded_height = level_height;
+ slice->size = level_height * slice->stride;
+
+ uint32_t slice_total_size = slice->size * level_depth;
+
+ /* The HW aligns level 1's base to a page if any of level 1 or
+ * below could be UIF XOR. The lower levels then inherit the
+ * alignment for as long as necesary, thanks to being power of
+ * two aligned.
+ */
+ if (i == 1 &&
+ level_width > 4 * uif_block_w &&
+ level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) {
+ slice_total_size = align(slice_total_size,
+ VC5_UIFCFG_PAGE_SIZE);
+ }
+
+ offset += slice_total_size;
+
+ }
+ rsc->size = offset;
+
+ /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only
+ * needs to be aligned to utile boundaries. Since tiles are laid out
+ * from small to big in memory, we need to align the later UIF slices
+ * to UIF blocks, if they were preceded by non-UIF-block-aligned LT
+ * slices.
+ *
+ * We additionally align to 4k, which improves UIF XOR performance.
+ */
+ uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) -
+ rsc->slices[0].offset);
+ if (page_align_offset) {
+ rsc->size += page_align_offset;
+ for (int i = 0; i <= prsc->last_level; i++)
+ rsc->slices[i].offset += page_align_offset;
+ }
+
+ /* Arrays and cube textures have a stride which is the distance from
+ * one full mipmap tree to the next (64b aligned). For 3D textures,
+ * we need to program the stride between slices of miplevel 0.
+ */
+ if (prsc->target != PIPE_TEXTURE_3D) {
+ rsc->cube_map_stride = align(rsc->slices[0].offset +
+ rsc->slices[0].size, 64);
+ rsc->size += rsc->cube_map_stride * (prsc->array_size - 1);
+ } else {
+ rsc->cube_map_stride = rsc->slices[0].size;
+ }
+}
+
+uint32_t
+vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, uint32_t layer)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_resource_slice *slice = &rsc->slices[level];
+
+ if (prsc->target == PIPE_TEXTURE_3D)
+ return slice->offset + layer * slice->size;
+ else
+ return slice->offset + layer * rsc->cube_map_stride;
+}
+
+static struct vc5_resource *
+vc5_resource_setup(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource);
+ if (!rsc)
+ return NULL;
+ struct pipe_resource *prsc = &rsc->base;
+
+ *prsc = *tmpl;
+
+ pipe_reference_init(&prsc->reference, 1);
+ prsc->screen = pscreen;
+
+ if (prsc->nr_samples <= 1 ||
+ screen->devinfo.ver >= 40 ||
+ util_format_is_depth_or_stencil(prsc->format)) {
+ rsc->cpp = util_format_get_blocksize(prsc->format);
+ if (screen->devinfo.ver < 40 && prsc->nr_samples > 1)
+ rsc->cpp *= prsc->nr_samples;
+ } else {
+ assert(vc5_rt_format_supported(&screen->devinfo, prsc->format));
+ uint32_t output_image_format =
+ vc5_get_rt_format(&screen->devinfo, prsc->format);
+ uint32_t internal_type;
+ uint32_t internal_bpp;
+ vc5_get_internal_type_bpp_for_output_format(&screen->devinfo,
+ output_image_format,
+ &internal_type,
+ &internal_bpp);
+ switch (internal_bpp) {
+ case V3D_INTERNAL_BPP_32:
+ rsc->cpp = 4;
+ break;
+ case V3D_INTERNAL_BPP_64:
+ rsc->cpp = 8;
+ break;
+ case V3D_INTERNAL_BPP_128:
+ rsc->cpp = 16;
+ break;
+ }
+ }
+
+ assert(rsc->cpp);
+
+ return rsc;
+}
+
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (haystack[i] == needle)
+ return true;
+ }
+
+ return false;
+}
+
+static struct pipe_resource *
+vc5_resource_create_with_modifiers(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers,
+ int count)
+{
+ bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base;
+ /* Use a tiled layout if we can, for better 3D performance. */
+ bool should_tile = true;
+
+ /* VBOs/PBOs are untiled (and 1 height). */
+ if (tmpl->target == PIPE_BUFFER)
+ should_tile = false;
+
+ /* Cursors are always linear, and the user can request linear as well.
+ */
+ if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR))
+ should_tile = false;
+
+ /* 1D and 1D_ARRAY textures are always raster-order. */
+ if (tmpl->target == PIPE_TEXTURE_1D ||
+ tmpl->target == PIPE_TEXTURE_1D_ARRAY)
+ should_tile = false;
+
+ /* Scanout BOs for simulator need to be linear for interaction with
+ * i965.
+ */
+ if (using_vc5_simulator &&
+ tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+ should_tile = false;
+
+ /* No user-specified modifier; determine our own. */
+ if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
+ linear_ok = true;
+ rsc->tiled = should_tile;
+ } else if (should_tile &&
+ find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+ modifiers, count)) {
+ rsc->tiled = true;
+ } else if (linear_ok) {
+ rsc->tiled = false;
+ } else {
+ fprintf(stderr, "Unsupported modifier requested\n");
+ return NULL;
+ }
+
+ rsc->internal_format = prsc->format;
+
+ vc5_setup_slices(rsc);
+ if (!vc5_resource_bo_alloc(rsc))
+ goto fail;
+
+ return prsc;
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+struct pipe_resource *
+vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+ return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
+static struct pipe_resource *
+vc5_resource_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base;
+ struct vc5_resource_slice *slice = &rsc->slices[0];
+
+ if (!rsc)
+ return NULL;
+
+ switch (whandle->modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ case DRM_FORMAT_MOD_INVALID:
+ rsc->tiled = false;
+ break;
+ /* XXX: UIF */
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported modifier 0x%llx\n",
+ (long long)whandle->modifier);
+ goto fail;
+ }
+
+ if (whandle->offset != 0) {
+ fprintf(stderr,
+ "Attempt to import unsupported winsys offset %u\n",
+ whandle->offset);
+ goto fail;
+ }
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ rsc->bo = vc5_bo_open_name(screen,
+ whandle->handle, whandle->stride);
+ break;
+ case DRM_API_HANDLE_TYPE_FD:
+ rsc->bo = vc5_bo_open_dmabuf(screen,
+ whandle->handle, whandle->stride);
+ break;
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported handle type %d\n",
+ whandle->type);
+ goto fail;
+ }
+
+ if (!rsc->bo)
+ goto fail;
+
+ rsc->internal_format = prsc->format;
+
+ vc5_setup_slices(rsc);
+ vc5_debug_resource_layout(rsc, "import");
+
+ if (whandle->stride != slice->stride) {
+ static bool warned = false;
+ if (!warned) {
+ warned = true;
+ fprintf(stderr,
+ "Attempting to import %dx%d %s with "
+ "unsupported stride %d instead of %d\n",
+ prsc->width0, prsc->height0,
+ util_format_short_name(prsc->format),
+ whandle->stride,
+ slice->stride);
+ }
+ goto fail;
+ }
+
+ return prsc;
+
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+static struct pipe_surface *
+vc5_create_surface(struct pipe_context *pctx,
+ struct pipe_resource *ptex,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_screen *screen = vc5->screen;
+ struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface);
+ struct vc5_resource *rsc = vc5_resource(ptex);
+
+ if (!surface)
+ return NULL;
+
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+ struct pipe_surface *psurf = &surface->base;
+ unsigned level = surf_tmpl->u.tex.level;
+ struct vc5_resource_slice *slice = &rsc->slices[level];
+
+ pipe_reference_init(&psurf->reference, 1);
+ pipe_resource_reference(&psurf->texture, ptex);
+
+ psurf->context = pctx;
+ psurf->format = surf_tmpl->format;
+ psurf->width = u_minify(ptex->width0, level);
+ psurf->height = u_minify(ptex->height0, level);
+ psurf->u.tex.level = level;
+ psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+ surface->offset = vc5_layer_offset(ptex, level,
+ psurf->u.tex.first_layer);
+ surface->tiling = slice->tiling;
+
+ surface->format = vc5_get_rt_format(&screen->devinfo, psurf->format);
+
+ if (util_format_is_depth_or_stencil(psurf->format)) {
+ switch (psurf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_16;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_32F;
+ break;
+ default:
+ surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_24;
+ }
+ } else {
+ uint32_t bpp, type;
+ vc5_get_internal_type_bpp_for_output_format(&screen->devinfo,
+ surface->format,
+ &type, &bpp);
+ surface->internal_type = type;
+ surface->internal_bpp = bpp;
+ }
+
+ if (surface->tiling == VC5_TILING_UIF_NO_XOR ||
+ surface->tiling == VC5_TILING_UIF_XOR) {
+ surface->padded_height_of_output_image_in_uif_blocks =
+ (slice->padded_height /
+ (2 * vc5_utile_height(rsc->cpp)));
+ }
+
+ if (rsc->separate_stencil) {
+ surface->separate_stencil =
+ vc5_create_surface(pctx, &rsc->separate_stencil->base,
+ surf_tmpl);
+ }
+
+ return &surface->base;
+}
+
+static void
+vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+ struct vc5_surface *surf = vc5_surface(psurf);
+
+ if (surf->separate_stencil)
+ pipe_surface_reference(&surf->separate_stencil, NULL);
+
+ pipe_resource_reference(&psurf->texture, NULL);
+ FREE(psurf);
+}
+
+static void
+vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
+{
+ /* All calls to flush_resource are followed by a flush of the context,
+ * so there's nothing to do.
+ */
+}
+
+static enum pipe_format
+vc5_resource_get_internal_format(struct pipe_resource *prsc)
+{
+ return vc5_resource(prsc)->internal_format;
+}
+
+static void
+vc5_resource_set_stencil(struct pipe_resource *prsc,
+ struct pipe_resource *stencil)
+{
+ vc5_resource(prsc)->separate_stencil = vc5_resource(stencil);
+}
+
+static struct pipe_resource *
+vc5_resource_get_stencil(struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ return &rsc->separate_stencil->base;
+}
+
+static const struct u_transfer_vtbl transfer_vtbl = {
+ .resource_create = vc5_resource_create,
+ .resource_destroy = vc5_resource_destroy,
+ .transfer_map = vc5_resource_transfer_map,
+ .transfer_unmap = vc5_resource_transfer_unmap,
+ .transfer_flush_region = u_default_transfer_flush_region,
+ .get_internal_format = vc5_resource_get_internal_format,
+ .set_stencil = vc5_resource_set_stencil,
+ .get_stencil = vc5_resource_get_stencil,
+};
+
+void
+vc5_resource_screen_init(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create_with_modifiers =
+ vc5_resource_create_with_modifiers;
+ pscreen->resource_create = u_transfer_helper_resource_create;
+ pscreen->resource_from_handle = vc5_resource_from_handle;
+ pscreen->resource_get_handle = vc5_resource_get_handle;
+ pscreen->resource_destroy = u_transfer_helper_resource_destroy;
+ pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
+ true, true, true);
+}
+
+void
+vc5_resource_context_init(struct pipe_context *pctx)
+{
+ pctx->transfer_map = u_transfer_helper_transfer_map;
+ pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
+ pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
+ pctx->buffer_subdata = u_default_buffer_subdata;
+ pctx->texture_subdata = u_default_texture_subdata;
+ pctx->create_surface = vc5_create_surface;
+ pctx->surface_destroy = vc5_surface_destroy;
+ pctx->resource_copy_region = util_resource_copy_region;
+ pctx->blit = vc5_blit;
+ pctx->flush_resource = vc5_flush_resource;
+}
diff --git a/src/gallium/drivers/v3d/v3d_resource.h b/src/gallium/drivers/v3d/v3d_resource.h
new file mode 100644
index 00000000000..dc68f803e90
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_resource.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_RESOURCE_H
+#define VC5_RESOURCE_H
+
+#include "v3d_screen.h"
+#include "util/u_transfer.h"
+
+/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These
+ * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB
+ * page. Those pages are then arranged left-to-right, top-to-bottom, to cover
+ * an image.
+ *
+ * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte
+ * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp).
+ */
+
+/**
+ * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory
+ * Format field of render target and Z/Stencil config.
+ */
+enum vc5_tiling_mode {
+ /* Untiled resources. Not valid as texture inputs. */
+ VC5_TILING_RASTER,
+
+ /* Single line of u-tiles. */
+ VC5_TILING_LINEARTILE,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_1_COLUMN,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_2_COLUMN,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_NO_XOR,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_XOR,
+};
+
+struct vc5_transfer {
+ struct pipe_transfer base;
+ void *map;
+};
+
+struct vc5_resource_slice {
+ uint32_t offset;
+ uint32_t stride;
+ uint32_t padded_height;
+ /* Size of a single pane of the slice. For 3D textures, there will be
+ * a number of panes equal to the minified, power-of-two-aligned
+ * depth.
+ */
+ uint32_t size;
+ uint8_t ub_pad;
+ enum vc5_tiling_mode tiling;
+};
+
+struct vc5_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ enum vc5_tiling_mode tiling;
+ /**
+ * Output image format for TILE_RENDERING_MODE_CONFIGURATION
+ */
+ uint8_t format;
+
+ /**
+ * Internal format of the tile buffer for
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_type;
+
+ /**
+ * internal bpp value (0=32bpp, 2=128bpp) for color buffers in
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_bpp;
+
+ uint32_t padded_height_of_output_image_in_uif_blocks;
+
+ /* If the resource being referenced is separate stencil, then this is
+ * the surface to use when reading/writing stencil.
+ */
+ struct pipe_surface *separate_stencil;
+};
+
+struct vc5_resource {
+ struct pipe_resource base;
+ struct vc5_bo *bo;
+ struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS];
+ uint32_t cube_map_stride;
+ uint32_t size;
+ int cpp;
+ bool tiled;
+
+ /**
+ * Number of times the resource has been written to.
+ *
+ * This is used to track whether we need to load the surface on first
+ * rendering.
+ */
+ uint64_t writes;
+
+ /**
+ * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
+ * for which parts of the resource are defined.
+ *
+ * Used for avoiding fallback to quad clears for clearing just depth,
+ * when the stencil contents have never been initialized. Note that
+ * we're lazy and fields not present in the buffer (DEPTH in a color
+ * buffer) may get marked.
+ */
+ uint32_t initialized_buffers;
+
+ enum pipe_format internal_format;
+
+ /* Resource storing the S8 part of a Z32F_S8 resource, or NULL. */
+ struct vc5_resource *separate_stencil;
+};
+
+static inline struct vc5_resource *
+vc5_resource(struct pipe_resource *prsc)
+{
+ return (struct vc5_resource *)prsc;
+}
+
+static inline struct vc5_surface *
+vc5_surface(struct pipe_surface *psurf)
+{
+ return (struct vc5_surface *)psurf;
+}
+
+static inline struct vc5_transfer *
+vc5_transfer(struct pipe_transfer *ptrans)
+{
+ return (struct vc5_transfer *)ptrans;
+}
+
+void vc5_resource_screen_init(struct pipe_screen *pscreen);
+void vc5_resource_context_init(struct pipe_context *pctx);
+struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl);
+uint32_t vc5_layer_offset(struct pipe_resource *prsc, uint32_t level,
+ uint32_t layer);
+
+
+#endif /* VC5_RESOURCE_H */
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
new file mode 100644
index 00000000000..95e6a6907f4
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "os/os_misc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/ralloc.h"
+
+#include <xf86drm.h>
+#include "v3d_screen.h"
+#include "v3d_context.h"
+#include "v3d_resource.h"
+#include "compiler/v3d_compiler.h"
+
+static const char *
+vc5_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ if (!screen->name) {
+ screen->name = ralloc_asprintf(screen,
+ "VC5 V3D %d.%d",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ }
+
+ return screen->name;
+}
+
+static const char *
+vc5_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "Broadcom";
+}
+
+static void
+vc5_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ util_hash_table_destroy(screen->bo_handles);
+ vc5_bufmgr_destroy(pscreen);
+ slab_destroy_parent(&screen->transfer_pool);
+
+ if (using_vc5_simulator)
+ vc5_simulator_destroy(screen);
+
+ v3d_compiler_free(screen->compiler);
+
+ close(screen->fd);
+ ralloc_free(pscreen);
+}
+
+static int
+vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+ case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+ return 1;
+
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return screen->devinfo.ver >= 40;
+
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 4;
+
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 400;
+
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ return 0;
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ if (screen->devinfo.ver >= 40)
+ return 0;
+ else
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ if (screen->devinfo.ver >= 40)
+ return 1;
+ else
+ return 0;
+
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+ return 1;
+
+
+ /* Stream output. */
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 64;
+
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 7;
+
+ /* Unsupported features. */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_CULL_DISTANCE:
+ case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+ case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+ case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+ case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+ case PIPE_CAP_TGSI_FS_FBFETCH:
+ case PIPE_CAP_INT64:
+ case PIPE_CAP_INT64_DIVMOD:
+ case PIPE_CAP_DOUBLES:
+ case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+ case PIPE_CAP_TGSI_BALLOT:
+ case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+ case PIPE_CAP_TGSI_CLOCK:
+ case PIPE_CAP_TGSI_TEX_TXF_LZ:
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ case PIPE_CAP_FENCE_SIGNAL:
+ case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TILE_RASTER_ORDER:
+ case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+ case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+ case PIPE_CAP_CONTEXT_PRIORITY_MASK:
+ case PIPE_CAP_CONSTBUF0_FLAGS:
+ case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
+ case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
+ case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
+ case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
+ case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
+ case PIPE_CAP_PACKED_UNIFORMS:
+ return 0;
+
+ /* Geometry shader output, unsupported. */
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return VC5_MAX_MIP_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 2048;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+
+ /* Queries. */
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 0;
+
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
+
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0x14E4;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int)(system_memory >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return 1;
+
+ default:
+ fprintf(stderr, "unknown param %d\n", param);
+ return 0;
+ }
+}
+
+static float
+vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 32;
+
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 512.0f;
+
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 0.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+
+ case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+ case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+ return 0.0f;
+ default:
+ fprintf(stderr, "unknown paramf %d\n", param);
+ return 0;
+ }
+}
+
+static int
+vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ if (shader != PIPE_SHADER_VERTEX &&
+ shader != PIPE_SHADER_FRAGMENT) {
+ return 0;
+ }
+
+ /* this is probably not totally correct.. but it's a start: */
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return UINT_MAX;
+
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return VC5_MAX_FS_INPUTS / 4;
+ else
+ return 16;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return 4;
+ else
+ return VC5_MAX_FS_INPUTS / 4;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 16 * 1024 * sizeof(float);
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 16;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+ case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return VC5_MAX_TEXTURE_SAMPLERS;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_NIR;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ return 0;
+ default:
+ fprintf(stderr, "unknown shader param %d\n", param);
+ return 0;
+ }
+ return 0;
+}
+
+static boolean
+vc5_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES)
+ return FALSE;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ !util_format_is_supported(format, usage)) {
+ return FALSE;
+ }
+
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ switch (format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ case PIPE_FORMAT_R32G32_SNORM:
+ case PIPE_FORMAT_R32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ case PIPE_FORMAT_R32G32_SSCALED:
+ case PIPE_FORMAT_R32_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ case PIPE_FORMAT_R16G16_USCALED:
+ case PIPE_FORMAT_R16_USCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ case PIPE_FORMAT_R8G8_USCALED:
+ case PIPE_FORMAT_R8_USCALED:
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ case PIPE_FORMAT_R8G8_SSCALED:
+ case PIPE_FORMAT_R8_SSCALED:
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ case PIPE_FORMAT_R10G10B10A2_USCALED:
+ case PIPE_FORMAT_B10G10R10A2_USCALED:
+ case PIPE_FORMAT_R10G10B10A2_SSCALED:
+ case PIPE_FORMAT_B10G10R10A2_SSCALED:
+ break;
+ default:
+ return FALSE;
+ }
+ }
+
+ if ((usage & PIPE_BIND_RENDER_TARGET) &&
+ !vc5_rt_format_supported(&screen->devinfo, format)) {
+ return FALSE;
+ }
+
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ !vc5_tex_format_supported(&screen->devinfo, format)) {
+ return FALSE;
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ !(format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+ format == PIPE_FORMAT_X8Z24_UNORM ||
+ format == PIPE_FORMAT_Z16_UNORM ||
+ format == PIPE_FORMAT_Z32_FLOAT ||
+ format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ return FALSE;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ !(format == PIPE_FORMAT_I8_UINT ||
+ format == PIPE_FORMAT_I16_UINT ||
+ format == PIPE_FORMAT_I32_UINT)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+static bool
+vc5_get_device_info(struct vc5_screen *screen)
+{
+ struct drm_v3d_get_param ident0 = {
+ .param = DRM_V3D_PARAM_V3D_CORE0_IDENT0,
+ };
+ struct drm_v3d_get_param ident1 = {
+ .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
+ };
+ int ret;
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n",
+ strerror(errno));
+ return false;
+ }
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident1);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ uint32_t major = (ident0.value >> 24) & 0xff;
+ uint32_t minor = (ident1.value >> 0) & 0xf;
+ screen->devinfo.ver = major * 10 + minor;
+
+ switch (screen->devinfo.ver) {
+ case 33:
+ case 41:
+ case 42:
+ break;
+ default:
+ fprintf(stderr,
+ "V3D %d.%d not supported by this version of Mesa.\n",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ return false;
+ }
+
+ return true;
+}
+
+static const void *
+vc5_screen_get_compiler_options(struct pipe_screen *pscreen,
+ enum pipe_shader_ir ir, unsigned shader)
+{
+ return &v3d_nir_options;
+}
+
+struct pipe_screen *
+v3d_screen_create(int fd)
+{
+ struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen);
+ struct pipe_screen *pscreen;
+
+ pscreen = &screen->base;
+
+ pscreen->destroy = vc5_screen_destroy;
+ pscreen->get_param = vc5_screen_get_param;
+ pscreen->get_paramf = vc5_screen_get_paramf;
+ pscreen->get_shader_param = vc5_screen_get_shader_param;
+ pscreen->context_create = vc5_context_create;
+ pscreen->is_format_supported = vc5_screen_is_format_supported;
+
+ screen->fd = fd;
+ list_inithead(&screen->bo_cache.time_list);
+ (void)mtx_init(&screen->bo_handles_mutex, mtx_plain);
+ screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+
+#if defined(USE_V3D_SIMULATOR)
+ vc5_simulator_init(screen);
+#endif
+
+ if (!vc5_get_device_info(screen))
+ goto fail;
+
+ slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16);
+
+ vc5_fence_init(screen);
+
+ v3d_process_debug_variable();
+
+ vc5_resource_screen_init(pscreen);
+
+ screen->compiler = v3d_compiler_init(&screen->devinfo);
+
+ pscreen->get_name = vc5_screen_get_name;
+ pscreen->get_vendor = vc5_screen_get_vendor;
+ pscreen->get_device_vendor = vc5_screen_get_vendor;
+ pscreen->get_compiler_options = vc5_screen_get_compiler_options;
+
+ return pscreen;
+
+fail:
+ close(fd);
+ ralloc_free(pscreen);
+ return NULL;
+}
diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h
new file mode 100644
index 00000000000..975bfe01a75
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_screen.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_SCREEN_H
+#define VC5_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "os/os_thread.h"
+#include "state_tracker/drm_driver.h"
+#include "util/list.h"
+#include "util/slab.h"
+#include "broadcom/common/v3d_debug.h"
+#include "broadcom/common/v3d_device_info.h"
+
+struct vc5_bo;
+
+#define VC5_MAX_MIP_LEVELS 12
+#define VC5_MAX_TEXTURE_SAMPLERS 32
+#define VC5_MAX_SAMPLES 4
+#define VC5_MAX_DRAW_BUFFERS 4
+#define VC5_MAX_ATTRIBUTES 16
+
+/* These are tunable parameters in the HW design, but all the V3D
+ * implementations agree.
+ */
+#define VC5_UIFCFG_BANKS 8
+#define VC5_UIFCFG_PAGE_SIZE 4096
+#define VC5_UIFCFG_XOR_VALUE (1 << 4)
+#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS)
+#define VC5_UBLOCK_SIZE 64
+#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE)
+#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE)
+
+struct vc5_simulator_file;
+
+struct vc5_screen {
+ struct pipe_screen base;
+ int fd;
+
+ struct v3d_device_info devinfo;
+
+ const char *name;
+
+ struct slab_parent_pool transfer_pool;
+
+ struct vc5_bo_cache {
+ /** List of struct vc5_bo freed, by age. */
+ struct list_head time_list;
+ /** List of struct vc5_bo freed, per size, by age. */
+ struct list_head *size_list;
+ uint32_t size_list_size;
+
+ mtx_t lock;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+ } bo_cache;
+
+ const struct v3d_compiler *compiler;
+
+ struct util_hash_table *bo_handles;
+ mtx_t bo_handles_mutex;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+
+ struct vc5_simulator_file *sim_file;
+};
+
+static inline struct vc5_screen *
+vc5_screen(struct pipe_screen *screen)
+{
+ return (struct vc5_screen *)screen;
+}
+
+struct pipe_screen *v3d_screen_create(int fd);
+
+void
+vc5_fence_init(struct vc5_screen *screen);
+
+#endif /* VC5_SCREEN_H */
diff --git a/src/gallium/drivers/v3d/v3d_simulator.c b/src/gallium/drivers/v3d/v3d_simulator.c
new file mode 100644
index 00000000000..86e4ed3be3d
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_simulator.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_simulator.c
+ *
+ * Implements VC5 simulation on top of a non-VC5 GEM fd.
+ *
+ * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on
+ * top of the simpenrose software simulator. Generally, VC5 driver BOs have a
+ * GEM-side copy of their contents and a simulator-side memory area that the
+ * GEM contents get copied into during simulation. Once simulation is done,
+ * the simulator's data is copied back out to the GEM BOs, so that rendering
+ * appears on the screen as if actual hardware rendering had been done.
+ *
+ * One of the limitations of this code is that we shouldn't really need a
+ * GEM-side BO for non-window-system BOs. However, do we need unique BO
+ * handles for each of our GEM bos so that this file can look up its state
+ * from the handle passed in at submit ioctl time (also, a couple of places
+ * outside of this file still call ioctls directly on the fd).
+ *
+ * Another limitation is that BO import doesn't work unless the underlying
+ * window system's BO size matches what VC5 is going to use, which of course
+ * doesn't work out in practice. This means that for now, only DRI3 (VC5
+ * makes the winsys BOs) is supported, not DRI2 (window system makes the winys
+ * BOs).
+ */
+
+#ifdef USE_V3D_SIMULATOR
+
+#include <sys/mman.h>
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/u_memory.h"
+#include "util/u_mm.h"
+#include "v3d_simulator_wrapper.h"
+
+#include "v3d_screen.h"
+#include "v3d_context.h"
+
+/** Global (across GEM fds) state for the simulator */
+static struct vc5_simulator_state {
+ mtx_t mutex;
+
+ struct v3d_hw *v3d;
+ int ver;
+
+ /* Base virtual address of the heap. */
+ void *mem;
+ /* Base hardware address of the heap. */
+ uint32_t mem_base;
+ /* Size of the heap. */
+ size_t mem_size;
+
+ struct mem_block *heap;
+ struct mem_block *overflow;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *fd_map;
+
+ int refcount;
+} sim_state = {
+ .mutex = _MTX_INITIALIZER_NP,
+};
+
+/** Per-GEM-fd state for the simulator. */
+struct vc5_simulator_file {
+ int fd;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *bo_map;
+
+ struct mem_block *gmp;
+ void *gmp_vaddr;
+};
+
+/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */
+struct vc5_simulator_bo {
+ struct vc5_simulator_file *file;
+
+ /** Area for this BO within sim_state->mem */
+ struct mem_block *block;
+ uint32_t size;
+ void *vaddr;
+
+ void *winsys_map;
+ uint32_t winsys_stride;
+
+ int handle;
+};
+
+static void *
+int_to_key(int key)
+{
+ return (void *)(uintptr_t)key;
+}
+
+static struct vc5_simulator_file *
+vc5_get_simulator_file_for_fd(int fd)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map,
+ int_to_key(fd + 1));
+ return entry ? entry->data : NULL;
+}
+
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL 0xfedcba98
+
+/* 128kb */
+#define GMP_ALIGN2 17
+
+/**
+ * Sets the range of GPU virtual address space to have the given GMP
+ * permissions (bit 0 = read, bit 1 = write, write-only forbidden).
+ */
+static void
+set_gmp_flags(struct vc5_simulator_file *file,
+ uint32_t offset, uint32_t size, uint32_t flag)
+{
+ assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0);
+ int gmp_offset = offset >> GMP_ALIGN2;
+ int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2;
+ uint32_t *gmp = file->gmp_vaddr;
+
+ assert(flag <= 0x3);
+
+ for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) {
+ int32_t bitshift = (i % 16) * 2;
+ gmp[i / 16] &= ~(0x3 << bitshift);
+ gmp[i / 16] |= flag << bitshift;
+ }
+}
+
+/**
+ * Allocates space in simulator memory and returns a tracking struct for it
+ * that also contains the drm_gem_cma_object struct.
+ */
+static struct vc5_simulator_bo *
+vc5_create_simulator_bo(int fd, int handle, unsigned size)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = rzalloc(file,
+ struct vc5_simulator_bo);
+ size = align(size, 4096);
+
+ sim_bo->file = file;
+ sim_bo->handle = handle;
+
+ mtx_lock(&sim_state.mutex);
+ sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0);
+ mtx_unlock(&sim_state.mutex);
+ assert(sim_bo->block);
+
+ set_gmp_flags(file, sim_bo->block->ofs, size, 0x3);
+
+ sim_bo->size = size;
+ sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
+ memset(sim_bo->vaddr, 0xd0, size);
+
+ *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL;
+
+ /* A handle of 0 is used for vc5_gem.c internal allocations that
+ * don't need to go in the lookup table.
+ */
+ if (handle != 0) {
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(file->bo_map, int_to_key(handle),
+ sim_bo);
+ mtx_unlock(&sim_state.mutex);
+ }
+
+ return sim_bo;
+}
+
+static void
+vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo)
+{
+ struct vc5_simulator_file *sim_file = sim_bo->file;
+
+ if (sim_bo->winsys_map)
+ munmap(sim_bo->winsys_map, sim_bo->size);
+
+ set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0);
+
+ mtx_lock(&sim_state.mutex);
+ u_mmFreeMem(sim_bo->block);
+ if (sim_bo->handle) {
+ struct hash_entry *entry =
+ _mesa_hash_table_search(sim_file->bo_map,
+ int_to_key(sim_bo->handle));
+ _mesa_hash_table_remove(sim_file->bo_map, entry);
+ }
+ mtx_unlock(&sim_state.mutex);
+ ralloc_free(sim_bo);
+}
+
+static struct vc5_simulator_bo *
+vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle)
+{
+ mtx_lock(&sim_state.mutex);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle));
+ mtx_unlock(&sim_state.mutex);
+
+ return entry ? entry->data : NULL;
+}
+
+static int
+vc5_simulator_pin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ vc5_bo_map(bo);
+ memcpy(sim_bo->vaddr, bo->map, bo->size);
+ }
+
+ return 0;
+}
+
+static int
+vc5_simulator_unpin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ if (*(uint32_t *)(sim_bo->vaddr +
+ sim_bo->size) != BO_SENTINEL) {
+ fprintf(stderr, "Buffer overflow in %s\n", bo->name);
+ }
+
+ vc5_bo_map(bo);
+ memcpy(bo->map, sim_bo->vaddr, bo->size);
+ }
+
+ return 0;
+}
+
+#if 0
+static void
+vc5_dump_to_file(struct vc5_exec_info *exec)
+{
+ static int dumpno = 0;
+ struct drm_vc5_get_hang_state *state;
+ struct drm_vc5_get_hang_state_bo *bo_state;
+ unsigned int dump_version = 0;
+
+ if (!(vc5_debug & VC5_DEBUG_DUMP))
+ return;
+
+ state = calloc(1, sizeof(*state));
+
+ int unref_count = 0;
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ unref_count++;
+ }
+
+ /* Add one more for the overflow area that isn't wrapped in a BO. */
+ state->bo_count = exec->bo_count + unref_count + 1;
+ bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+ char *filename = NULL;
+ asprintf(&filename, "vc5-dri-%d.dump", dumpno++);
+ FILE *f = fopen(filename, "w+");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s: %s", filename,
+ strerror(errno));
+ return;
+ }
+
+ fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+ state->ct0ca = exec->ct0ca;
+ state->ct0ea = exec->ct0ea;
+ state->ct1ca = exec->ct1ca;
+ state->ct1ea = exec->ct1ea;
+ state->start_bin = exec->ct0ca;
+ state->start_render = exec->ct1ca;
+ fwrite(state, sizeof(*state), 1, f);
+
+ int i;
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ bo_state[i].handle = i; /* Not used by the parser. */
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ bo_state[i].handle = 0;
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ i++;
+ }
+
+ /* Add the static overflow memory area. */
+ bo_state[i].handle = exec->bo_count;
+ bo_state[i].paddr = sim_state.overflow->ofs;
+ bo_state[i].size = sim_state.overflow->size;
+ i++;
+
+ fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+ for (int i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ void *overflow = calloc(1, sim_state.overflow->size);
+ fwrite(overflow, 1, sim_state.overflow->size, f);
+ free(overflow);
+
+ free(state);
+ free(bo_state);
+ fclose(f);
+}
+#endif
+
+int
+vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_v3d_submit_cl *submit, struct vc5_job *job)
+{
+ struct vc5_screen *screen = vc5->screen;
+ int fd = screen->fd;
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]);
+ struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL;
+ struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL;
+ uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
+ uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
+ uint32_t row_len = MIN2(sim_stride, winsys_stride);
+ int ret;
+
+ if (ctex && csim_bo->winsys_map) {
+#if 0
+ fprintf(stderr, "%dx%d %d %d %d\n",
+ ctex->base.b.width0, ctex->base.b.height0,
+ winsys_stride,
+ sim_stride,
+ ctex->bo->size);
+#endif
+
+ for (int y = 0; y < ctex->base.height0; y++) {
+ memcpy(ctex->bo->map + y * sim_stride,
+ csim_bo->winsys_map + y * winsys_stride,
+ row_len);
+ }
+ }
+
+ ret = vc5_simulator_pin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ //vc5_dump_to_file(&exec);
+
+ if (sim_state.ver >= 41)
+ v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs);
+ else
+ v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs);
+
+ ret = vc5_simulator_unpin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ if (ctex && csim_bo->winsys_map) {
+ for (int y = 0; y < ctex->base.height0; y++) {
+ memcpy(csim_bo->winsys_map + y * winsys_stride,
+ ctex->bo->map + y * sim_stride,
+ row_len);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Map the underlying GEM object from the real hardware GEM handle.
+ */
+static void *
+vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo)
+{
+ int ret;
+ void *map;
+
+ struct drm_mode_map_dumb map_dumb = {
+ .handle = sim_bo->handle,
+ };
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, map_dumb.offset);
+ if (map == MAP_FAILED) {
+ fprintf(stderr,
+ "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ sim_bo->handle, (long long)map_dumb.offset,
+ (int)sim_bo->size);
+ abort();
+ }
+
+ return map;
+}
+
+/**
+ * Do fixups after a BO has been opened from a handle.
+ *
+ * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
+ * time, but we're still using drmPrimeFDToHandle() so we have this helper to
+ * be called afterward instead.
+ */
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size)
+{
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, handle, size);
+
+ sim_bo->winsys_stride = winsys_stride;
+ sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo);
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation.
+ *
+ * Making a VC5 BO is just a matter of making a corresponding BO on the host.
+ */
+static int
+vc5_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args)
+{
+ int ret;
+ struct drm_mode_create_dumb create = {
+ .width = 128,
+ .bpp = 8,
+ .height = (args->size + 127) / 128,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ assert(create.size >= args->size);
+
+ args->handle = create.handle;
+
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, create.handle, args->size);
+
+ args->offset = sim_bo->block->ofs;
+
+ return ret;
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
+ *
+ * We just pass this straight through to dumb mmap.
+ */
+static int
+vc5_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args)
+{
+ int ret;
+ struct drm_mode_map_dumb map = {
+ .handle = args->handle,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+ args->offset = map.offset;
+
+ return ret;
+}
+
+static int
+vc5_simulator_get_bo_offset_ioctl(int fd, struct drm_v3d_get_bo_offset *args)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file,
+ args->handle);
+
+ args->offset = sim_bo->block->ofs;
+
+ return 0;
+}
+
+static int
+vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
+{
+ /* Free the simulator's internal tracking. */
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file,
+ args->handle);
+
+ vc5_free_simulator_bo(sim_bo);
+
+ /* Pass the call on down. */
+ return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args);
+}
+
+static int
+vc5_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args)
+{
+ if (sim_state.ver >= 41)
+ return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
+ else
+ return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
+}
+
+int
+vc5_simulator_ioctl(int fd, unsigned long request, void *args)
+{
+ switch (request) {
+ case DRM_IOCTL_V3D_CREATE_BO:
+ return vc5_simulator_create_bo_ioctl(fd, args);
+ case DRM_IOCTL_V3D_MMAP_BO:
+ return vc5_simulator_mmap_bo_ioctl(fd, args);
+ case DRM_IOCTL_V3D_GET_BO_OFFSET:
+ return vc5_simulator_get_bo_offset_ioctl(fd, args);
+
+ case DRM_IOCTL_V3D_WAIT_BO:
+ /* We do all of the vc5 rendering synchronously, so we just
+ * return immediately on the wait ioctls. This ignores any
+ * native rendering to the host BO, so it does mean we race on
+ * front buffer rendering.
+ */
+ return 0;
+
+ case DRM_IOCTL_V3D_GET_PARAM:
+ return vc5_simulator_get_param_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_CLOSE:
+ return vc5_simulator_gem_close_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_OPEN:
+ case DRM_IOCTL_GEM_FLINK:
+ return drmIoctl(fd, request, args);
+ default:
+ fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request);
+ abort();
+ }
+}
+
+static void
+vc5_simulator_init_global(const struct v3d_device_info *devinfo)
+{
+ mtx_lock(&sim_state.mutex);
+ if (sim_state.refcount++) {
+ mtx_unlock(&sim_state.mutex);
+ return;
+ }
+
+ sim_state.v3d = v3d_hw_auto_new(NULL);
+ v3d_hw_alloc_mem(sim_state.v3d, 1024 * 1024 * 1024);
+ sim_state.mem_base =
+ v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size,
+ &sim_state.mem);
+
+ /* Allocate from anywhere from 4096 up. We don't allocate at 0,
+ * because for OQs and some other addresses in the HW, 0 means
+ * disabled.
+ */
+ sim_state.heap = u_mmInit(4096, sim_state.mem_size - 4096);
+
+ /* Make a block of 0xd0 at address 0 to make sure we don't screw up
+ * and land there.
+ */
+ struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0);
+ memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096);
+
+ sim_state.ver = v3d_hw_get_version(sim_state.v3d);
+
+ mtx_unlock(&sim_state.mutex);
+
+ sim_state.fd_map =
+ _mesa_hash_table_create(NULL,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ if (sim_state.ver >= 41)
+ v3d41_simulator_init_regs(sim_state.v3d);
+ else
+ v3d33_simulator_init_regs(sim_state.v3d);
+}
+
+void
+vc5_simulator_init(struct vc5_screen *screen)
+{
+ vc5_simulator_init_global(&screen->devinfo);
+
+ screen->sim_file = rzalloc(screen, struct vc5_simulator_file);
+ struct vc5_simulator_file *sim_file = screen->sim_file;
+
+ screen->sim_file->bo_map =
+ _mesa_hash_table_create(screen->sim_file,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1),
+ screen->sim_file);
+ mtx_unlock(&sim_state.mutex);
+
+ sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0);
+ sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs -
+ sim_state.mem_base);
+}
+
+void
+vc5_simulator_destroy(struct vc5_screen *screen)
+{
+ mtx_lock(&sim_state.mutex);
+ if (!--sim_state.refcount) {
+ _mesa_hash_table_destroy(sim_state.fd_map, NULL);
+ u_mmDestroy(sim_state.heap);
+ /* No memsetting the struct, because it contains the mutex. */
+ sim_state.mem = NULL;
+ }
+ mtx_unlock(&sim_state.mutex);
+}
+
+#endif /* USE_V3D_SIMULATOR */
diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp
new file mode 100644
index 00000000000..7b04ded2b53
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file
+ *
+ * Wraps bits of the V3D simulator interface in a C interface for the
+ * v3d_simulator.c code to use.
+ */
+
+#ifdef USE_V3D_SIMULATOR
+
+#include "v3d_simulator_wrapper.h"
+
+#define V3D_TECH_VERSION 3
+#define V3D_REVISION 3
+#define V3D_SUB_REV 0
+#define V3D_HIDDEN_REV 0
+#define V3D_COMPAT_REV 0
+#include "v3d_hw_auto.h"
+
+extern "C" {
+
+struct v3d_hw *v3d_hw_auto_new(void *in_params)
+{
+ return v3d_hw_auto_make_unique().release();
+}
+
+
+uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p)
+{
+ return hw->get_mem(size, p);
+}
+
+bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size)
+{
+ return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS;
+}
+
+bool v3d_hw_has_gca(struct v3d_hw *hw)
+{
+ return hw->has_gca();
+}
+
+uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg)
+{
+ return hw->read_reg(reg);
+}
+
+void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val)
+{
+ hw->write_reg(reg, val);
+}
+
+void v3d_hw_tick(struct v3d_hw *hw)
+{
+ return hw->tick();
+}
+
+int v3d_hw_get_version(struct v3d_hw *hw)
+{
+ const V3D_HUB_IDENT_T *ident = hw->get_hub_ident();
+
+ return ident->tech_version * 10 + ident->revision;
+}
+
+}
+
+#endif /* USE_V3D_SIMULATOR */
diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.h b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h
new file mode 100644
index 00000000000..8b5dca15ed9
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+struct v3d_hw;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct v3d_hw *v3d_hw_auto_new(void *params);
+uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p);
+bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size);
+bool v3d_hw_has_gca(struct v3d_hw *hw);
+uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg);
+void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val);
+void v3d_hw_tick(struct v3d_hw *hw);
+int v3d_hw_get_version(struct v3d_hw *hw);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gallium/drivers/v3d/v3d_tiling.c b/src/gallium/drivers/v3d/v3d_tiling.c
new file mode 100644
index 00000000000..f9c4a342184
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_tiling.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_tiling.c
+ *
+ * Handles information about the VC5 tiling formats, and loading and storing
+ * from them.
+ */
+
+#include <stdint.h>
+#include "v3d_screen.h"
+#include "v3d_context.h"
+#include "v3d_tiling.h"
+
+/** Return the width in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_width(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ case 2:
+ return 8;
+ case 4:
+ case 8:
+ return 4;
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_height(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ return 4;
+ case 8:
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/**
+ * Returns the byte address for a given pixel within a utile.
+ *
+ * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
+ * arrangement.
+ */
+static inline uint32_t
+vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+
+ assert(x < utile_w && y < utile_h);
+
+ return x * cpp + y * utile_w * cpp;
+}
+
+/**
+ * Returns the byte offset for a given pixel in a LINEARTILE layout.
+ *
+ * LINEARTILE is a single line of utiles in either the X or Y direction.
+ */
+static inline uint32_t
+vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t utile_index_x = x / utile_w;
+ uint32_t utile_index_y = y / utile_h;
+
+ assert(utile_index_x == 0 || utile_index_y == 0);
+
+ return (64 * (utile_index_x + utile_index_y) +
+ vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UBLINEAR layout.
+ *
+ * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
+ * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
+ */
+static inline uint32_t
+vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+ int ublinear_number)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t ub_w = utile_w * 2;
+ uint32_t ub_h = utile_h * 2;
+ uint32_t ub_x = x / ub_w;
+ uint32_t ub_y = y / ub_h;
+
+ return (256 * (ub_y * ublinear_number +
+ ub_x) +
+ ((x & utile_w) ? 64 : 0) +
+ ((y & utile_h) ? 128 : 0) +
+ + vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+static inline uint32_t
+vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
+}
+
+static inline uint32_t
+vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UIF layout.
+ *
+ * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
+ * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
+ * 4x4 groups, and those 4x4 groups are then stored in raster order.
+ */
+static inline uint32_t
+vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
+ bool do_xor)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t mb_width = utile_w * 2;
+ uint32_t mb_height = utile_h * 2;
+ uint32_t log2_mb_width = ffs(mb_width) - 1;
+ uint32_t log2_mb_height = ffs(mb_height) - 1;
+
+ /* Macroblock X, y */
+ uint32_t mb_x = x >> log2_mb_width;
+ uint32_t mb_y = y >> log2_mb_height;
+ /* X, y within the macroblock */
+ uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
+ uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
+
+ if (do_xor && (mb_x / 4) & 1)
+ mb_y ^= 0x10;
+
+ uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
+ uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
+
+ uint32_t mb_base_addr = mb_id * 256;
+
+ bool top = mb_pixel_y < utile_h;
+ bool left = mb_pixel_x < utile_w;
+
+ /* Docs have this in pixels, we do bytes here. */
+ uint32_t mb_tile_offset = (!top * 128 + !left * 64);
+
+ uint32_t utile_x = mb_pixel_x & (utile_w - 1);
+ uint32_t utile_y = mb_pixel_y & (utile_h - 1);
+
+ uint32_t mb_pixel_address = (mb_base_addr +
+ mb_tile_offset +
+ vc5_get_utile_pixel_offset(cpp,
+ utile_x,
+ utile_y));
+
+ return mb_pixel_address;
+}
+
+static inline uint32_t
+vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true);
+}
+
+static inline uint32_t
+vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false);
+}
+
+static inline void
+vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ for (uint32_t y = 0; y < box->height; y++) {
+ void *cpu_row = cpu + y * cpu_stride;
+
+ for (int x = 0; x < box->width; x++) {
+ uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
+ box->x + x,
+ box->y + y);
+
+ if (false) {
+ fprintf(stderr, "%3d,%3d -> %d\n",
+ box->x + x, box->y + y,
+ pixel_offset);
+ }
+
+ if (is_load) {
+ memcpy(cpu_row + x * cpp,
+ gpu + pixel_offset,
+ cpp);
+ } else {
+ memcpy(gpu + pixel_offset,
+ cpu_row + x * cpp,
+ cpp);
+ }
+ }
+ }
+}
+
+static inline void
+vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ switch (cpp) {
+ case 1:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 1, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 2:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 2, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 4:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 4, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 8:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 8, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 16:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 16, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ }
+}
+
+static inline void
+vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ enum vc5_tiling_mode tiling_format,
+ int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box,
+ bool is_load)
+{
+ switch (tiling_format) {
+ case VC5_TILING_UIF_XOR:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_uif_xor_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UIF_NO_XOR:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_uif_no_xor_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_2_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_2_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_1_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_1_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_LINEARTILE:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_lt_pixel_offset,
+ is_load);
+ break;
+ default:
+ unreachable("Unsupported tiling format");
+ break;
+ }
+}
+
+/**
+ * Loads pixel data from the start (microtile-aligned) box in \p src to the
+ * start of \p dst according to the given tiling format.
+ */
+void
+vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(src, src_stride,
+ dst, dst_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ true);
+}
+
+/**
+ * Stores pixel data from the start of \p src into a (microtile-aligned) box in
+ * \p dst according to the given tiling format.
+ */
+void
+vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(dst, dst_stride,
+ src, src_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ false);
+}
diff --git a/src/gallium/drivers/v3d/v3d_tiling.h b/src/gallium/drivers/v3d/v3d_tiling.h
new file mode 100644
index 00000000000..d3cf48c4527
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_tiling.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_TILING_H
+#define VC5_TILING_H
+
+uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
+void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
+void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
+void vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+void vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+
+#endif /* VC5_TILING_H */
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
new file mode 100644
index 00000000000..c7a39b50a74
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "v3d_context.h"
+#include "compiler/v3d_compiler.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#if 0
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc5_resource *rsc = vc5_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc5_format-sampled texture contents are
+ * replaced with our border color, the vc5_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc5_format) {
+ default:
+ case VC5_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC5_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+#endif
+
+static uint32_t
+get_texrect_scale(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static uint32_t
+get_texture_size(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+
+ switch (contents) {
+ case QUNIFORM_TEXTURE_WIDTH:
+ return u_minify(texture->texture->width0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_HEIGHT:
+ return u_minify(texture->texture->height0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_DEPTH:
+ return u_minify(texture->texture->depth0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ return texture->texture->array_size;
+ case QUNIFORM_TEXTURE_LEVELS:
+ return (texture->u.tex.last_level -
+ texture->u.tex.first_level) + 1;
+ default:
+ unreachable("Bad texture size field");
+ }
+}
+
+static struct vc5_bo *
+vc5_upload_ubo(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->prog_data.base->ubo_size)
+ return NULL;
+
+ struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen,
+ shader->prog_data.base->ubo_size,
+ "ubo");
+ void *data = vc5_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
+ memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
+ ((const void *)gallium_uniforms +
+ shader->prog_data.base->ubo_ranges[i].src_offset),
+ shader->prog_data.base->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+/**
+ * Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
+ *
+ * Some bits of this field are dependent on the type of sample being done by
+ * the shader, while other bits are dependent on the sampler state. We OR the
+ * two together here.
+ */
+static void
+write_texture_p0(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit,
+ uint32_t shader_data)
+{
+ struct pipe_sampler_state *psampler = texstate->samplers[unit];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+ cl_aligned_u32(uniforms, shader_data | sampler->p0);
+}
+
+/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */
+static void
+write_texture_p1(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t data)
+{
+ /* Extract the texture unit from the top bits, and the compiler's
+ * packed p1 from the bottom.
+ */
+ uint32_t unit = data >> 5;
+ uint32_t p1 = data & 0x1f;
+
+ struct pipe_sampler_view *psview = texstate->textures[unit];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+ .texture_state_record_base_address = texstate->texture_state[unit],
+ };
+
+ uint32_t packed;
+ V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
+ (uint8_t *)&packed,
+ &unpacked);
+
+ cl_aligned_u32(uniforms, p1 | packed | sview->p1);
+}
+
+/** Writes the V3D 4.x TMU configuration parameter 0. */
+static void
+write_tmu_p0(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t data)
+{
+ /* Extract the texture unit from the top bits, and the compiler's
+ * packed p0 from the bottom.
+ */
+ uint32_t unit = data >> 24;
+ uint32_t p0 = data & 0x00ffffff;
+
+ struct pipe_sampler_view *psview = texstate->textures[unit];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+ struct vc5_resource *rsc = vc5_resource(psview->texture);
+
+ cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0);
+ vc5_job_add_bo(job, rsc->bo);
+}
+
+/** Writes the V3D 4.x TMU configuration parameter 1. */
+static void
+write_tmu_p1(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t data)
+{
+ /* Extract the texture unit from the top bits, and the compiler's
+ * packed p1 from the bottom.
+ */
+ uint32_t unit = data >> 24;
+ uint32_t p0 = data & 0x00ffffff;
+
+ struct pipe_sampler_state *psampler = texstate->samplers[unit];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+ cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0);
+}
+
+struct vc5_cl_reloc
+vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate)
+{
+ struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
+ struct vc5_job *job = vc5->job;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms);
+
+ /* We always need to return some space for uniforms, because the HW
+ * will be prefetching, even if we don't read any in the program.
+ */
+ vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
+
+ struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect);
+ vc5_bo_reference(uniform_stream.bo);
+
+ struct vc5_cl_out *uniforms =
+ cl_start(&job->indirect);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc5->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TMU_CONFIG_P0:
+ write_tmu_p0(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TMU_CONFIG_P1:
+ write_tmu_p1(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+#if 0
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ write_texture_first_level(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+#endif
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ cl_aligned_u32(&uniforms,
+ get_texture_size(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc5->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc5->zsa->base.alpha.ref_value);
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ cl_aligned_u32(&uniforms, vc5->sample_mask);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ if (uinfo->data[i] == 0) {
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ ubo, 0);
+ } else {
+ int ubo_index = uinfo->data[i];
+ struct vc5_resource *rsc =
+ vc5_resource(cb->cb[ubo_index].buffer);
+
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ rsc->bo,
+ cb->cb[ubo_index].buffer_offset);
+ }
+ break;
+
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ cl_aligned_f(&uniforms,
+ texstate->textures[uinfo->data[i]]->u.tex.first_level);
+ break;
+
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ /* XXX */
+ break;
+
+ case QUNIFORM_SPILL_OFFSET:
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ vc5->prog.spill_bo, 0);
+ break;
+
+ case QUNIFORM_SPILL_SIZE_PER_THREAD:
+ cl_aligned_u32(&uniforms,
+ vc5->prog.spill_size_per_thread);
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
+
+ write_texture_p0(job, &uniforms, texstate,
+ uinfo->contents[i] -
+ QUNIFORM_TEXTURE_CONFIG_P0_0,
+ uinfo->data[i]);
+ break;
+
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+ shader, i, __gen_address_offset(&uniform_stream) + i * 4,
+ written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&job->indirect, uniforms);
+
+ vc5_bo_unreference(&ubo);
+
+ return uniform_stream;
+}
+
+void
+vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) {
+ switch (shader->prog_data.base->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC5_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC5_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC5_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TMU_CONFIG_P0:
+ case QUNIFORM_TMU_CONFIG_P1:
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ case QUNIFORM_SPILL_OFFSET:
+ case QUNIFORM_SPILL_SIZE_PER_THREAD:
+ /* We could flag this on just the stage we're
+ * compiling for, but it's not passed in.
+ */
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC5_DIRTY_ZSA;
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ dirty |= VC5_DIRTY_SAMPLE_MASK;
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}
diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h
new file mode 100644
index 00000000000..faeda2c0fbb
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_context.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* This file generates the per-v3d-version function prototypes. It must only
+ * be included from v3d_context.h.
+ */
+
+struct v3d_hw;
+struct vc5_format;
+
+void v3dX(emit_state)(struct pipe_context *pctx);
+void v3dX(emit_rcl)(struct vc5_job *job);
+void v3dX(draw_init)(struct pipe_context *pctx);
+void v3dX(state_init)(struct pipe_context *pctx);
+
+void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job);
+
+void v3dX(simulator_init_regs)(struct v3d_hw *v3d);
+int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
+ struct drm_v3d_get_param *args);
+void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
+ uint32_t gmp_ofs);
+const struct vc5_format *v3dX(get_format_desc)(enum pipe_format f);
+void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp);
diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c
new file mode 100644
index 00000000000..03ee6b2b196
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -0,0 +1,714 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blitter.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_pack_color.h"
+#include "util/u_prim_restart.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+
+#include "v3d_context.h"
+#include "v3d_resource.h"
+#include "v3d_cl.h"
+#include "broadcom/compiler/v3d_compiler.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+/**
+ * Does the initial bining command list setup for drawing to a given FBO.
+ */
+static void
+vc5_start_draw(struct vc5_context *vc5)
+{
+ struct vc5_job *job = vc5->job;
+
+ if (job->needs_flush)
+ return;
+
+ /* Get space to emit our BCL state, using a branch to jump to a new BO
+ * if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ job->submit.bcl_start = job->bcl.bo->offset;
+ vc5_job_add_bo(job, job->bcl.bo);
+
+ job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
+ uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64;
+ job->tile_state = vc5_bo_alloc(vc5->screen,
+ job->draw_tiles_y *
+ job->draw_tiles_x *
+ tsda_per_tile_size,
+ "TSDA");
+
+#if V3D_VERSION < 40
+ /* "Binning mode lists start with a Tile Binning Mode Configuration
+ * item (120)"
+ *
+ * Part1 signals the end of binning config setup.
+ */
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) {
+ config.tile_allocation_memory_address =
+ cl_address(job->tile_alloc, 0);
+ config.tile_allocation_memory_size = job->tile_alloc->size;
+ }
+#endif
+
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
+#if V3D_VERSION >= 40
+ config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1;
+ config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1;
+ config.number_of_render_targets_minus_1 =
+ MAX2(vc5->framebuffer.nr_cbufs, 1) - 1;
+#else /* V3D_VERSION < 40 */
+ config.tile_state_data_array_base_address =
+ cl_address(job->tile_state, 0);
+
+ config.width_in_tiles = job->draw_tiles_x;
+ config.height_in_tiles = job->draw_tiles_y;
+ /* Must be >= 1 */
+ config.number_of_render_targets =
+ MAX2(vc5->framebuffer.nr_cbufs, 1);
+#endif /* V3D_VERSION < 40 */
+
+ config.multisample_mode_4x = job->msaa;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+ /* Disable any leftover OQ state from another job. */
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+ job->needs_flush = true;
+ job->draw_width = vc5->framebuffer.width;
+ job->draw_height = vc5->framebuffer.height;
+}
+
+static void
+vc5_predraw_check_textures(struct pipe_context *pctx,
+ struct vc5_texture_stateobj *stage_tex)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ for (int i = 0; i < stage_tex->num_textures; i++) {
+ struct pipe_sampler_view *view = stage_tex->textures[i];
+ if (!view)
+ continue;
+
+ vc5_flush_jobs_writing_resource(vc5, view->texture);
+ }
+}
+
+static void
+vc5_emit_gl_shader_state(struct vc5_context *vc5,
+ const struct pipe_draw_info *info)
+{
+ struct vc5_job *job = vc5->job;
+ /* VC5_DIRTY_VTXSTATE */
+ struct vc5_vertex_stateobj *vtx = vc5->vtx;
+ /* VC5_DIRTY_VTXBUF */
+ struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf;
+
+ /* Upload the uniforms to the indirect CL first */
+ struct vc5_cl_reloc fs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.fs,
+ &vc5->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc5->fragtex);
+ struct vc5_cl_reloc vs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.vs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+ struct vc5_cl_reloc cs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.cs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+
+ /* See GFXH-930 workaround below */
+ uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
+ uint32_t shader_rec_offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(GL_SHADER_STATE_RECORD) +
+ num_elements_to_emit *
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+ 32);
+
+ cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
+ shader.enable_clipping = true;
+ /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
+ shader.point_size_in_shaded_vertex_data =
+ (info->mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ /* Must be set if the shader modifies Z, discards, or modifies
+ * the sample mask. For any of these cases, the fragment
+ * shader needs to write the Z value (even just discards).
+ */
+ shader.fragment_shader_does_z_writes =
+ (vc5->prog.fs->prog_data.fs->writes_z ||
+ vc5->prog.fs->prog_data.fs->discard);
+
+ shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+ vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w;
+
+ shader.number_of_varyings_in_fragment_shader =
+ vc5->prog.fs->prog_data.base->num_inputs;
+
+ shader.propagate_nans = true;
+
+ shader.coordinate_shader_code_address =
+ cl_address(vc5->prog.cs->bo, 0);
+ shader.vertex_shader_code_address =
+ cl_address(vc5->prog.vs->bo, 0);
+ shader.fragment_shader_code_address =
+ cl_address(vc5->prog.fs->bo, 0);
+
+ /* XXX: Use combined input/output size flag in the common
+ * case.
+ */
+ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.coordinate_shader_input_vpm_segment_size =
+ MAX2(vc5->prog.cs->prog_data.vs->vpm_input_size, 1);
+ shader.vertex_shader_input_vpm_segment_size =
+ MAX2(vc5->prog.vs->prog_data.vs->vpm_input_size, 1);
+
+ shader.coordinate_shader_output_vpm_segment_size =
+ vc5->prog.cs->prog_data.vs->vpm_output_size;
+ shader.vertex_shader_output_vpm_segment_size =
+ vc5->prog.vs->prog_data.vs->vpm_output_size;
+
+ shader.coordinate_shader_uniforms_address = cs_uniforms;
+ shader.vertex_shader_uniforms_address = vs_uniforms;
+ shader.fragment_shader_uniforms_address = fs_uniforms;
+
+#if V3D_VERSION >= 41
+ shader.coordinate_shader_4_way_threadable =
+ vc5->prog.cs->prog_data.vs->base.threads == 4;
+ shader.vertex_shader_4_way_threadable =
+ vc5->prog.vs->prog_data.vs->base.threads == 4;
+ shader.fragment_shader_4_way_threadable =
+ vc5->prog.fs->prog_data.fs->base.threads == 4;
+
+ shader.coordinate_shader_start_in_final_thread_section =
+ vc5->prog.cs->prog_data.vs->base.single_seg;
+ shader.vertex_shader_start_in_final_thread_section =
+ vc5->prog.vs->prog_data.vs->base.single_seg;
+ shader.fragment_shader_start_in_final_thread_section =
+ vc5->prog.fs->prog_data.fs->base.single_seg;
+#else
+ shader.coordinate_shader_4_way_threadable =
+ vc5->prog.cs->prog_data.vs->base.threads == 4;
+ shader.coordinate_shader_2_way_threadable =
+ vc5->prog.cs->prog_data.vs->base.threads == 2;
+ shader.vertex_shader_4_way_threadable =
+ vc5->prog.vs->prog_data.vs->base.threads == 4;
+ shader.vertex_shader_2_way_threadable =
+ vc5->prog.vs->prog_data.vs->base.threads == 2;
+ shader.fragment_shader_4_way_threadable =
+ vc5->prog.fs->prog_data.fs->base.threads == 4;
+ shader.fragment_shader_2_way_threadable =
+ vc5->prog.fs->prog_data.fs->base.threads == 2;
+#endif
+
+ shader.vertex_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_iid;
+ shader.vertex_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_iid;
+
+ shader.address_of_default_attribute_values =
+ cl_address(vtx->default_attribute_values, 0);
+ }
+
+ for (int i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vertexbuf->vb[elem->vertex_buffer_index];
+ struct vc5_resource *rsc = vc5_resource(vb->buffer.resource);
+
+ const uint32_t size =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+ cl_emit_with_prepacked(&job->indirect,
+ GL_SHADER_STATE_ATTRIBUTE_RECORD,
+ &vtx->attrs[i * size], attr) {
+ attr.stride = vb->stride;
+ attr.address = cl_address(rsc->bo,
+ vb->buffer_offset +
+ elem->src_offset);
+ attr.number_of_values_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->vattr_sizes[i];
+ attr.number_of_values_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->vattr_sizes[i];
+#if V3D_VERSION >= 41
+ attr.maximum_index = 0xffffff;
+#endif
+ }
+ }
+
+ if (vtx->num_elements == 0) {
+ /* GFXH-930: At least one attribute must be enabled and read
+ * by CS and VS. If we have no attributes being consumed by
+ * the shader, set up a dummy to be loaded into the VPM.
+ */
+ cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+ /* Valid address of data whose value will be unused. */
+ attr.address = cl_address(job->indirect.bo, 0);
+
+ attr.type = ATTRIBUTE_FLOAT;
+ attr.stride = 0;
+ attr.vec_size = 1;
+
+ attr.number_of_values_read_by_coordinate_shader = 1;
+ attr.number_of_values_read_by_vertex_shader = 1;
+ }
+ }
+
+ cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+ state.address = cl_address(job->indirect.bo, shader_rec_offset);
+ state.number_of_attribute_arrays = num_elements_to_emit;
+ }
+
+ vc5_bo_unreference(&cs_uniforms.bo);
+ vc5_bo_unreference(&vs_uniforms.bo);
+ vc5_bo_unreference(&fs_uniforms.bo);
+
+ job->shader_rec_count++;
+}
+
+/**
+ * Computes the various transform feedback statistics, since they can't be
+ * recorded by CL packets.
+ */
+static void
+vc5_tf_statistics_record(struct vc5_context *vc5,
+ const struct pipe_draw_info *info,
+ bool prim_tf)
+{
+ if (!vc5->active_queries)
+ return;
+
+ uint32_t prims = u_prims_for_vertices(info->mode, info->count);
+ vc5->prims_generated += prims;
+
+ if (prim_tf) {
+ /* XXX: Only count if we didn't overflow. */
+ vc5->tf_prims_generated += prims;
+ }
+}
+
+static void
+vc5_update_job_ez(struct vc5_context *vc5, struct vc5_job *job)
+{
+ switch (vc5->zsa->ez_state) {
+ case VC5_EZ_UNDECIDED:
+ /* If the Z/S state didn't pick a direction but didn't
+ * disable, then go along with the current EZ state. This
+ * allows EZ optimization for Z func == EQUAL or NEVER.
+ */
+ break;
+
+ case VC5_EZ_LT_LE:
+ case VC5_EZ_GT_GE:
+ /* If the Z/S state picked a direction, then it needs to match
+ * the current direction if we've decided on one.
+ */
+ if (job->ez_state == VC5_EZ_UNDECIDED)
+ job->ez_state = vc5->zsa->ez_state;
+ else if (job->ez_state != vc5->zsa->ez_state)
+ job->ez_state = VC5_EZ_DISABLED;
+ break;
+
+ case VC5_EZ_DISABLED:
+ /* If the current Z/S state disables EZ because of a bad Z
+ * func or stencil operation, then we can't do any more EZ in
+ * this frame.
+ */
+ job->ez_state = VC5_EZ_DISABLED;
+ break;
+ }
+
+ /* If the FS affects the Z of the pixels, then it may update against
+ * the chosen EZ direction (though we could use
+ * ARB_conservative_depth's hints to avoid this)
+ */
+ if (vc5->prog.fs->prog_data.fs->writes_z) {
+ job->ez_state = VC5_EZ_DISABLED;
+ }
+
+ if (job->first_ez_state == VC5_EZ_UNDECIDED)
+ job->first_ez_state = job->ez_state;
+}
+
+static void
+vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ if (!info->count_from_stream_output && !info->indirect &&
+ !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
+ return;
+
+ /* Fall back for weird desktop GL primitive restart values. */
+ if (info->primitive_restart &&
+ info->index_size) {
+ uint32_t mask = ~0;
+
+ switch (info->index_size) {
+ case 2:
+ mask = 0xffff;
+ break;
+ case 1:
+ mask = 0xff;
+ break;
+ }
+
+ if (info->restart_index != mask) {
+ util_draw_vbo_without_prim_restart(pctx, info);
+ return;
+ }
+ }
+
+ if (info->mode >= PIPE_PRIM_QUADS) {
+ util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base);
+ util_primconvert_draw_vbo(vc5->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
+
+ /* Before setting up the draw, flush anything writing to the textures
+ * that we read from.
+ */
+ vc5_predraw_check_textures(pctx, &vc5->verttex);
+ vc5_predraw_check_textures(pctx, &vc5->fragtex);
+
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* Get space to emit our draw call into the BCL, using a branch to
+ * jump to a new BO if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ if (vc5->prim_mode != info->mode) {
+ vc5->prim_mode = info->mode;
+ vc5->dirty |= VC5_DIRTY_PRIM_MODE;
+ }
+
+ vc5_start_draw(vc5);
+ vc5_update_compiled_shaders(vc5, info->mode);
+ vc5_update_job_ez(vc5, job);
+
+#if V3D_VERSION >= 41
+ v3d41_emit_state(pctx);
+#else
+ v3d33_emit_state(pctx);
+#endif
+
+ if (vc5->dirty & (VC5_DIRTY_VTXBUF |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_COMPILED_CS |
+ VC5_DIRTY_COMPILED_VS |
+ VC5_DIRTY_COMPILED_FS |
+ vc5->prog.cs->uniform_dirty_bits |
+ vc5->prog.vs->uniform_dirty_bits |
+ vc5->prog.fs->uniform_dirty_bits)) {
+ vc5_emit_gl_shader_state(vc5, info);
+ }
+
+ vc5->dirty = 0;
+
+ /* The Base Vertex/Base Instance packet sets those values to nonzero
+ * for the next draw call only.
+ */
+ if (info->index_bias || info->start_instance) {
+ cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
+ base.base_instance = info->start_instance;
+ base.base_vertex = info->index_bias;
+ }
+ }
+
+ uint32_t prim_tf_enable = 0;
+#if V3D_VERSION < 40
+ /* V3D 3.x: The HW only processes transform feedback on primitives
+ * with the flag set.
+ */
+ if (vc5->streamout.num_targets)
+ prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
+#endif
+
+ vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets);
+
+ /* Note that the primitive type fields match with OpenGL/gallium
+ * definitions, up to but not including QUADS.
+ */
+ if (info->index_size) {
+ uint32_t index_size = info->index_size;
+ uint32_t offset = info->start * index_size;
+ struct pipe_resource *prsc;
+ if (info->has_user_indices) {
+ prsc = NULL;
+ u_upload_data(vc5->uploader, 0,
+ info->count * info->index_size, 4,
+ info->index.user,
+ &offset, &prsc);
+ } else {
+ prsc = info->index.resource;
+ }
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+#if V3D_VERSION >= 40
+ cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
+ ib.address = cl_address(rsc->bo, 0);
+ ib.size = rsc->bo->size;
+ }
+#endif
+
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+#if V3D_VERSION >= 40
+ prim.index_offset = offset;
+#else /* V3D_VERSION < 40 */
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
+ prim.mode = info->mode | prim_tf_enable;
+ prim.enable_primitive_restarts = info->primitive_restart;
+
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+ prim.length = info->count;
+#if V3D_VERSION >= 40
+ prim.index_offset = offset;
+#else /* V3D_VERSION < 40 */
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
+ prim.mode = info->mode | prim_tf_enable;
+ prim.enable_primitive_restarts = info->primitive_restart;
+ }
+ }
+
+ job->draw_calls_queued++;
+
+ if (info->has_user_indices)
+ pipe_resource_reference(&prsc, NULL);
+ } else {
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) {
+ prim.mode = info->mode | prim_tf_enable;
+ prim.index_of_first_vertex = info->start;
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) {
+ prim.mode = info->mode | prim_tf_enable;
+ prim.length = info->count;
+ prim.index_of_first_vertex = info->start;
+ }
+ }
+ }
+ job->draw_calls_queued++;
+
+ if (vc5->zsa && job->zsbuf &&
+ (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled)) {
+ struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+ vc5_job_add_bo(job, rsc->bo);
+
+ if (vc5->zsa->base.depth.enabled) {
+ job->resolve |= PIPE_CLEAR_DEPTH;
+ rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
+ }
+
+ if (vc5->zsa->base.stencil[0].enabled) {
+ job->resolve |= PIPE_CLEAR_STENCIL;
+ rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
+ }
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+
+ if (job->resolve & bit || !job->cbufs[i])
+ continue;
+ struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture);
+
+ job->resolve |= bit;
+ vc5_job_add_bo(job, rsc->bo);
+ }
+
+ if (job->referenced_size > 768 * 1024 * 1024) {
+ perf_debug("Flushing job with %dkb to try to free up memory\n",
+ job->referenced_size / 1024);
+ vc5_flush(pctx);
+ }
+
+ if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ vc5_flush(pctx);
+}
+
+static void
+vc5_clear(struct pipe_context *pctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* We can't flag new buffers for clearing once we've queued draws. We
+ * could avoid this by using the 3d engine to clear.
+ */
+ if (job->draw_calls_queued) {
+ perf_debug("Flushing rendering to process new clear.\n");
+ vc5_job_submit(vc5, job);
+ job = vc5_get_job_for_fbo(vc5);
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+ if (!(buffers & bit))
+ continue;
+
+ struct pipe_surface *psurf = vc5->framebuffer.cbufs[i];
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ union util_color uc;
+ uint32_t internal_size = 4 << surf->internal_bpp;
+
+ static union pipe_color_union swapped_color;
+ if (vc5->swap_color_rb & (1 << i)) {
+ swapped_color.f[0] = color->f[2];
+ swapped_color.f[1] = color->f[1];
+ swapped_color.f[2] = color->f[0];
+ swapped_color.f[3] = color->f[3];
+ color = &swapped_color;
+ }
+
+ switch (surf->internal_type) {
+ case V3D_INTERNAL_TYPE_8:
+ util_pack_color(color->f, PIPE_FORMAT_R8G8B8A8_UNORM,
+ &uc);
+ memcpy(job->clear_color[i], uc.ui, internal_size);
+ break;
+ case V3D_INTERNAL_TYPE_8I:
+ case V3D_INTERNAL_TYPE_8UI:
+ job->clear_color[i][0] = ((color->ui[0] & 0xff) |
+ (color->ui[1] & 0xff) << 8 |
+ (color->ui[2] & 0xff) << 16 |
+ (color->ui[3] & 0xff) << 24);
+ break;
+ case V3D_INTERNAL_TYPE_16F:
+ util_pack_color(color->f, PIPE_FORMAT_R16G16B16A16_FLOAT,
+ &uc);
+ memcpy(job->clear_color[i], uc.ui, internal_size);
+ break;
+ case V3D_INTERNAL_TYPE_16I:
+ case V3D_INTERNAL_TYPE_16UI:
+ job->clear_color[i][0] = ((color->ui[0] & 0xffff) |
+ color->ui[1] << 16);
+ job->clear_color[i][1] = ((color->ui[2] & 0xffff) |
+ color->ui[3] << 16);
+ break;
+ case V3D_INTERNAL_TYPE_32F:
+ case V3D_INTERNAL_TYPE_32I:
+ case V3D_INTERNAL_TYPE_32UI:
+ memcpy(job->clear_color[i], color->ui, internal_size);
+ break;
+ }
+
+ rsc->initialized_buffers |= bit;
+ }
+
+ unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
+ if (zsclear) {
+ struct vc5_resource *rsc =
+ vc5_resource(vc5->framebuffer.zsbuf->texture);
+
+ if (zsclear & PIPE_CLEAR_DEPTH)
+ job->clear_z = depth;
+ if (zsclear & PIPE_CLEAR_STENCIL)
+ job->clear_s = stencil;
+
+ rsc->initialized_buffers |= zsclear;
+ }
+
+ job->draw_min_x = 0;
+ job->draw_min_y = 0;
+ job->draw_max_x = vc5->framebuffer.width;
+ job->draw_max_y = vc5->framebuffer.height;
+ job->cleared |= buffers;
+ job->resolve |= buffers;
+
+ vc5_start_draw(vc5);
+}
+
+static void
+vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+ const union pipe_color_union *color,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear RT\n");
+}
+
+static void
+vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+ unsigned buffers, double depth, unsigned stencil,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear DS\n");
+}
+
+void
+v3dX(draw_init)(struct pipe_context *pctx)
+{
+ pctx->draw_vbo = vc5_draw_vbo;
+ pctx->clear = vc5_clear;
+ pctx->clear_render_target = vc5_clear_render_target;
+ pctx->clear_depth_stencil = vc5_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c
new file mode 100644
index 00000000000..e2aba356de4
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_half.h"
+#include "v3d_context.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+static uint8_t
+vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
+{
+ /* We may get a bad blendfactor when blending is disabled. */
+ if (factor == 0)
+ return V3D_BLEND_FACTOR_ZERO;
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return V3D_BLEND_FACTOR_ZERO;
+ case PIPE_BLENDFACTOR_ONE:
+ return V3D_BLEND_FACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return V3D_BLEND_FACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return V3D_BLEND_FACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return V3D_BLEND_FACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return V3D_BLEND_FACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return (dst_alpha_one ?
+ V3D_BLEND_FACTOR_ONE :
+ V3D_BLEND_FACTOR_DST_ALPHA);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return (dst_alpha_one ?
+ V3D_BLEND_FACTOR_ZERO :
+ V3D_BLEND_FACTOR_INV_DST_ALPHA);
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return V3D_BLEND_FACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return V3D_BLEND_FACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+ default:
+ unreachable("Bad blend factor");
+ }
+}
+
+static inline uint16_t
+swizzled_border_color(const struct v3d_device_info *devinfo,
+ struct pipe_sampler_state *sampler,
+ struct vc5_sampler_view *sview,
+ int chan)
+{
+ const struct util_format_description *desc =
+ util_format_description(sview->base.format);
+ uint8_t swiz = chan;
+
+ /* If we're doing swizzling in the sampler, then only rearrange the
+ * border color for the mismatch between the VC5 texture format and
+ * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
+ * the sampler's swizzle.
+ *
+ * For swizzling in the shader, we don't do any pre-swizzling of the
+ * border color.
+ */
+ if (vc5_get_tex_return_size(devinfo, sview->base.format,
+ sampler->compare_mode) != 32)
+ swiz = desc->swizzle[swiz];
+
+ switch (swiz) {
+ case PIPE_SWIZZLE_0:
+ return util_float_to_half(0.0);
+ case PIPE_SWIZZLE_1:
+ return util_float_to_half(1.0);
+ default:
+ return util_float_to_half(sampler->border_color.f[swiz]);
+ }
+}
+
+#if V3D_VERSION < 40
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+ switch (pipe_swizzle) {
+ case PIPE_SWIZZLE_0:
+ return 0;
+ case PIPE_SWIZZLE_1:
+ return 1;
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W:
+ return 2 + pipe_swizzle;
+ default:
+ unreachable("unknown swizzle");
+ }
+}
+
+static void
+emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
+ int i)
+{
+ struct vc5_job *job = vc5->job;
+ struct pipe_sampler_state *psampler = stage_tex->samplers[i];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+ struct pipe_sampler_view *psview = stage_tex->textures[i];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+ struct pipe_resource *prsc = psview->texture;
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ const struct v3d_device_info *devinfo = &vc5->screen->devinfo;
+
+ stage_tex->texture_state[i].offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(TEXTURE_SHADER_STATE),
+ 32);
+ vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
+ job->indirect.bo);
+
+ uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format,
+ psampler->compare_mode);
+
+ struct V3D33_TEXTURE_SHADER_STATE unpacked = {
+ /* XXX */
+ .border_color_red = swizzled_border_color(devinfo, psampler,
+ sview, 0),
+ .border_color_green = swizzled_border_color(devinfo, psampler,
+ sview, 1),
+ .border_color_blue = swizzled_border_color(devinfo, psampler,
+ sview, 2),
+ .border_color_alpha = swizzled_border_color(devinfo, psampler,
+ sview, 3),
+
+ /* In the normal texturing path, the LOD gets clamped between
+ * min/max, and the base_level field (set in the sampler view
+ * from first_level) only decides where the min/mag switch
+ * happens, so we need to use the LOD clamps to keep us
+ * between min and max.
+ *
+ * For txf, the LOD clamp is still used, despite GL not
+ * wanting that. We will need to have a separate
+ * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
+ * support txf properly.
+ */
+ .min_level_of_detail = MIN2(psview->u.tex.first_level +
+ MAX2(psampler->min_lod, 0),
+ psview->u.tex.last_level),
+ .max_level_of_detail = MIN2(psview->u.tex.first_level +
+ psampler->max_lod,
+ psview->u.tex.last_level),
+
+ .texture_base_pointer = cl_address(rsc->bo,
+ rsc->slices[0].offset),
+
+ .output_32_bit = return_size == 32,
+ };
+
+ /* Set up the sampler swizzle if we're doing 16-bit sampling. For
+ * 32-bit, we leave swizzling up to the shader compiler.
+ *
+ * Note: Contrary to the docs, the swizzle still applies even if the
+ * return size is 32. It's just that you probably want to swizzle in
+ * the shader, because you need the Y/Z/W channels to be defined.
+ */
+ if (return_size == 32) {
+ unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+ unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+ unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+ unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+ } else {
+ unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
+ unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
+ unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
+ unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
+ }
+
+ int min_img_filter = psampler->min_img_filter;
+ int min_mip_filter = psampler->min_mip_filter;
+ int mag_img_filter = psampler->mag_img_filter;
+
+ if (return_size == 32) {
+ min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ }
+
+ bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
+ switch (min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ unpacked.filter += min_nearest ? 2 : 0;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ unpacked.filter += min_nearest ? 4 : 8;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ unpacked.filter += min_nearest ? 4 : 8;
+ unpacked.filter += 2;
+ break;
+ }
+
+ if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
+ unpacked.filter++;
+
+ if (psampler->max_anisotropy > 8)
+ unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
+ else if (psampler->max_anisotropy > 4)
+ unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
+ else if (psampler->max_anisotropy > 2)
+ unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
+ else if (psampler->max_anisotropy)
+ unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
+
+ uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
+ cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
+
+ for (int i = 0; i < ARRAY_SIZE(packed); i++)
+ packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
+
+ /* TMU indirect structs need to be 32b aligned. */
+ vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
+ cl_emit_prepacked(&job->indirect, &packed);
+}
+
+static void
+emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
+{
+ for (int i = 0; i < stage_tex->num_textures; i++) {
+ if (stage_tex->textures[i])
+ emit_one_texture(vc5, stage_tex, i);
+ }
+}
+#endif /* V3D_VERSION < 40 */
+
+static uint32_t
+translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt)
+{
+ if (vc5->swap_color_rb & (1 << rt)) {
+ colormask = ((colormask & (2 | 8)) |
+ ((colormask & 1) << 2) |
+ ((colormask & 4) >> 2));
+ }
+
+ return (~colormask) & 0xf;
+}
+
+static void
+emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job,
+ struct pipe_blend_state *blend, int rt)
+{
+ cl_emit(&job->bcl, BLEND_CONFIG, config) {
+ struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
+
+#if V3D_VERSION >= 40
+ config.render_target_mask = 1 << rt;
+#else
+ assert(rt == 0);
+#endif
+
+ config.colour_blend_mode = rtblend->rgb_func;
+ config.colour_blend_dst_factor =
+ vc5_factor(rtblend->rgb_dst_factor,
+ vc5->blend_dst_alpha_one);
+ config.colour_blend_src_factor =
+ vc5_factor(rtblend->rgb_src_factor,
+ vc5->blend_dst_alpha_one);
+
+ config.alpha_blend_mode = rtblend->alpha_func;
+ config.alpha_blend_dst_factor =
+ vc5_factor(rtblend->alpha_dst_factor,
+ vc5->blend_dst_alpha_one);
+ config.alpha_blend_src_factor =
+ vc5_factor(rtblend->alpha_src_factor,
+ vc5->blend_dst_alpha_one);
+ }
+}
+
+void
+v3dX(emit_state)(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5->job;
+ bool rasterizer_discard = vc5->rasterizer->base.rasterizer_discard;
+
+ if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
+ VC5_DIRTY_RASTERIZER)) {
+ float *vpscale = vc5->viewport.scale;
+ float *vptranslate = vc5->viewport.translate;
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
+
+ /* Clip to the scissor if it's enabled, but still clip to the
+ * drawable regardless since that controls where the binner
+ * tries to put things.
+ *
+ * Additionally, always clip the rendering to the viewport,
+ * since the hardware does guardband clipping, meaning
+ * primitives would rasterize outside of the view volume.
+ */
+ uint32_t minx, miny, maxx, maxy;
+ if (!vc5->rasterizer->base.scissor) {
+ minx = MAX2(vp_minx, 0);
+ miny = MAX2(vp_miny, 0);
+ maxx = MIN2(vp_maxx, job->draw_width);
+ maxy = MIN2(vp_maxy, job->draw_height);
+ } else {
+ minx = MAX2(vp_minx, vc5->scissor.minx);
+ miny = MAX2(vp_miny, vc5->scissor.miny);
+ maxx = MIN2(vp_maxx, vc5->scissor.maxx);
+ maxy = MIN2(vp_maxy, vc5->scissor.maxy);
+ }
+
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
+ clip.clip_window_left_pixel_coordinate = minx;
+ clip.clip_window_bottom_pixel_coordinate = miny;
+ clip.clip_window_width_in_pixels = maxx - minx;
+ clip.clip_window_height_in_pixels = maxy - miny;
+
+#if V3D_VERSION < 41
+ /* The HW won't entirely clip out when scissor w/h is
+ * 0. Just treat it the same as rasterizer discard.
+ */
+ if (clip.clip_window_width_in_pixels == 0 ||
+ clip.clip_window_height_in_pixels == 0) {
+ rasterizer_discard = true;
+ clip.clip_window_width_in_pixels = 1;
+ clip.clip_window_height_in_pixels = 1;
+ }
+#endif
+ }
+
+ job->draw_min_x = MIN2(job->draw_min_x, minx);
+ job->draw_min_y = MIN2(job->draw_min_y, miny);
+ job->draw_max_x = MAX2(job->draw_max_x, maxx);
+ job->draw_max_y = MAX2(job->draw_max_y, maxy);
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_COMPILED_FS)) {
+ cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
+ config.enable_forward_facing_primitive =
+ !rasterizer_discard &&
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_FRONT);
+ config.enable_reverse_facing_primitive =
+ !rasterizer_discard &&
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_BACK);
+ /* This seems backwards, but it's what gets the
+ * clipflat test to pass.
+ */
+ config.clockwise_primitives =
+ vc5->rasterizer->base.front_ccw;
+
+ config.enable_depth_offset =
+ vc5->rasterizer->base.offset_tri;
+
+ config.rasterizer_oversample_mode =
+ vc5->rasterizer->base.multisample;
+
+ config.direct3d_provoking_vertex =
+ vc5->rasterizer->base.flatshade_first;
+
+ config.blend_enable = vc5->blend->rt[0].blend_enable;
+
+ /* Note: EZ state may update based on the compiled FS,
+ * along with ZSA
+ */
+ config.early_z_updates_enable =
+ (job->ez_state != VC5_EZ_DISABLED);
+ if (vc5->zsa->base.depth.enabled) {
+ config.z_updates_enable =
+ vc5->zsa->base.depth.writemask;
+ config.early_z_enable =
+ config.early_z_updates_enable;
+ config.depth_test_function =
+ vc5->zsa->base.depth.func;
+ } else {
+ config.depth_test_function = PIPE_FUNC_ALWAYS;
+ }
+
+ config.stencil_enable =
+ vc5->zsa->base.stencil[0].enabled;
+ }
+
+ }
+
+ if (vc5->dirty & VC5_DIRTY_RASTERIZER &&
+ vc5->rasterizer->base.offset_tri) {
+ cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
+ depth.depth_offset_factor =
+ vc5->rasterizer->offset_factor;
+ depth.depth_offset_units =
+ vc5->rasterizer->offset_units;
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
+ cl_emit(&job->bcl, POINT_SIZE, point_size) {
+ point_size.point_size = vc5->rasterizer->point_size;
+ }
+
+ cl_emit(&job->bcl, LINE_WIDTH, line_width) {
+ line_width.line_width = vc5->rasterizer->base.line_width;
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_256th_of_pixel =
+ vc5->viewport.scale[0] * 256.0f;
+ clip.viewport_half_height_in_1_256th_of_pixel =
+ vc5->viewport.scale[1] * 256.0f;
+ }
+
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs =
+ vc5->viewport.translate[2];
+ clip.viewport_z_scale_zc_to_zs =
+ vc5->viewport.scale[2];
+ }
+ cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+ clip.minimum_zw = (vc5->viewport.translate[2] -
+ vc5->viewport.scale[2]);
+ clip.maximum_zw = (vc5->viewport.translate[2] +
+ vc5->viewport.scale[2]);
+ }
+
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
+ vp.viewport_centre_x_coordinate =
+ vc5->viewport.translate[0];
+ vp.viewport_centre_y_coordinate =
+ vc5->viewport.translate[1];
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) {
+ struct pipe_blend_state *blend = vc5->blend;
+
+ if (blend->independent_blend_enable) {
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
+ emit_rt_blend(vc5, job, blend, i);
+ } else {
+ emit_rt_blend(vc5, job, blend, 0);
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND) {
+ struct pipe_blend_state *blend = vc5->blend;
+
+ cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
+ if (blend->independent_blend_enable) {
+ mask.render_target_0_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[0].colormask, 0);
+ mask.render_target_1_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[1].colormask, 1);
+ mask.render_target_2_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[2].colormask, 2);
+ mask.render_target_3_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[3].colormask, 3);
+ } else {
+ mask.render_target_0_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[0].colormask, 0);
+ mask.render_target_1_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[0].colormask, 1);
+ mask.render_target_2_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[0].colormask, 2);
+ mask.render_target_3_per_colour_component_write_masks =
+ translate_colormask(vc5, blend->rt[0].colormask, 3);
+ }
+ }
+ }
+
+ /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
+ * color.
+ */
+ if (vc5->dirty & VC5_DIRTY_BLEND_COLOR ||
+ (V3D_VERSION < 41 && (vc5->dirty & VC5_DIRTY_BLEND))) {
+ cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
+ colour.red_f16 = (vc5->swap_color_rb ?
+ vc5->blend_color.hf[2] :
+ vc5->blend_color.hf[0]);
+ colour.green_f16 = vc5->blend_color.hf[1];
+ colour.blue_f16 = (vc5->swap_color_rb ?
+ vc5->blend_color.hf[0] :
+ vc5->blend_color.hf[2]);
+ colour.alpha_f16 = vc5->blend_color.hf[3];
+ }
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
+ struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
+ struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
+
+ if (front->enabled) {
+ cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
+ vc5->zsa->stencil_front, config) {
+ config.stencil_ref_value =
+ vc5->stencil_ref.ref_value[0];
+ }
+ }
+
+ if (back->enabled) {
+ cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
+ vc5->zsa->stencil_back, config) {
+ config.stencil_ref_value =
+ vc5->stencil_ref.ref_value[1];
+ }
+ }
+ }
+
+#if V3D_VERSION < 40
+ /* Pre-4.x, we have texture state that depends on both the sampler and
+ * the view, so we merge them together at draw time.
+ */
+ if (vc5->dirty & VC5_DIRTY_FRAGTEX)
+ emit_textures(vc5, &vc5->fragtex);
+
+ if (vc5->dirty & VC5_DIRTY_VERTTEX)
+ emit_textures(vc5, &vc5->verttex);
+#endif
+
+ if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
+ bool emitted_any = false;
+
+ for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) {
+ if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i])
+ continue;
+
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+ flags.varying_offset_v0 = i;
+
+ if (emitted_any) {
+ flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+ flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+ } else {
+ flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
+ ((i == 0) ?
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED :
+ V3D_VARYING_FLAGS_ACTION_ZEROED);
+
+ flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_ZEROED;
+ }
+
+ flags.flat_shade_flags_for_varyings_v024 =
+ vc5->prog.fs->prog_data.fs->flat_shade_flags[i];
+ }
+
+ emitted_any = true;
+ }
+
+ if (!emitted_any) {
+ cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
+ }
+ }
+
+#if V3D_VERSION >= 40
+ if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) {
+ bool emitted_any = false;
+
+ for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) {
+ if (!vc5->prog.fs->prog_data.fs->centroid_flags[i])
+ continue;
+
+ cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
+ flags.varying_offset_v0 = i;
+
+ if (emitted_any) {
+ flags.action_for_centroid_flags_of_lower_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+ flags.action_for_centroid_flags_of_higher_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+ } else {
+ flags.action_for_centroid_flags_of_lower_numbered_varyings =
+ ((i == 0) ?
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED :
+ V3D_VARYING_FLAGS_ACTION_ZEROED);
+
+ flags.action_for_centroid_flags_of_higher_numbered_varyings =
+ V3D_VARYING_FLAGS_ACTION_ZEROED;
+ }
+
+ flags.centroid_flags_for_varyings_v024 =
+ vc5->prog.fs->prog_data.fs->centroid_flags[i];
+ }
+
+ emitted_any = true;
+ }
+
+ if (!emitted_any) {
+ cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
+ }
+ }
+#endif
+
+ /* Set up the transform feedback data specs (which VPM entries to
+ * output to which buffers).
+ */
+ if (vc5->dirty & (VC5_DIRTY_STREAMOUT |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_PRIM_MODE)) {
+ struct vc5_streamout_stateobj *so = &vc5->streamout;
+
+ if (so->num_targets) {
+ bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+ uint16_t *tf_specs = (psiz_per_vertex ?
+ vc5->prog.bind_vs->tf_specs_psiz :
+ vc5->prog.bind_vs->tf_specs);
+
+#if V3D_VERSION >= 40
+ job->tf_enabled = (vc5->prog.bind_vs->num_tf_specs != 0 &&
+ vc5->active_queries);
+
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
+ tfe.number_of_16_bit_output_data_specs_following =
+ vc5->prog.bind_vs->num_tf_specs;
+ tfe.enable = job->tf_enabled;
+ };
+#else /* V3D_VERSION < 40 */
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
+ tfe.number_of_32_bit_output_buffer_address_following =
+ so->num_targets;
+ tfe.number_of_16_bit_output_data_specs_following =
+ vc5->prog.bind_vs->num_tf_specs;
+ };
+#endif /* V3D_VERSION < 40 */
+ for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
+ cl_emit_prepacked(&job->bcl, &tf_specs[i]);
+ }
+ } else if (job->tf_enabled) {
+#if V3D_VERSION >= 40
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
+ tfe.enable = false;
+ };
+ job->tf_enabled = false;
+#endif /* V3D_VERSION >= 40 */
+ }
+ }
+
+ /* Set up the trasnform feedback buffers. */
+ if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
+ struct vc5_streamout_stateobj *so = &vc5->streamout;
+ for (int i = 0; i < so->num_targets; i++) {
+ const struct pipe_stream_output_target *target =
+ so->targets[i];
+ struct vc5_resource *rsc = target ?
+ vc5_resource(target->buffer) : NULL;
+
+#if V3D_VERSION >= 40
+ if (!target)
+ continue;
+
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
+ output.buffer_address =
+ cl_address(rsc->bo,
+ target->buffer_offset);
+ output.buffer_size_in_32_bit_words =
+ target->buffer_size >> 2;
+ output.buffer_number = i;
+ }
+#else /* V3D_VERSION < 40 */
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
+ if (target) {
+ output.address =
+ cl_address(rsc->bo,
+ target->buffer_offset);
+ }
+ };
+#endif /* V3D_VERSION < 40 */
+ if (target) {
+ vc5_job_add_write_resource(vc5->job,
+ target->buffer);
+ }
+ /* XXX: buffer_size? */
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_OQ) {
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
+ job->oq_enabled = vc5->active_queries && vc5->current_oq;
+ if (job->oq_enabled) {
+ counter.address = cl_address(vc5->current_oq, 0);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/v3d/v3dx_format_table.c b/src/gallium/drivers/v3d/v3dx_format_table.c
new file mode 100644
index 00000000000..458488119c7
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_format_table.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright © 2014-2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+
+#include "v3d_context.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/common/v3d_macros.h"
+#include "v3d_format_table.h"
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \
+ [PIPE_FORMAT_##pipe] = { \
+ true, \
+ V3D_OUTPUT_IMAGE_FORMAT_##rt, \
+ TEXTURE_DATA_FORMAT_##tex, \
+ swiz, \
+ return_size, \
+ return_channels, \
+ }
+
+#define SWIZ_X001 SWIZ(X, 0, 0, 1)
+#define SWIZ_XY01 SWIZ(X, Y, 0, 1)
+#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1)
+#define SWIZ_XYZW SWIZ(X, Y, Z, W)
+#define SWIZ_YZWX SWIZ(Y, Z, W, X)
+#define SWIZ_YZW1 SWIZ(Y, Z, W, 1)
+#define SWIZ_ZYXW SWIZ(Z, Y, X, W)
+#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1)
+#define SWIZ_XXXY SWIZ(X, X, X, Y)
+#define SWIZ_XXX1 SWIZ(X, X, X, 1)
+#define SWIZ_XXXX SWIZ(X, X, X, X)
+#define SWIZ_000X SWIZ(0, 0, 0, X)
+
+static const struct vc5_format format_table[] = {
+ FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_UNORM, RGBA8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_UNORM, RGBA8, RGBA8, SWIZ_XYZ1, 16, 0),
+ FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0),
+ FORMAT(R10G10B10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, 0),
+ FORMAT(R10G10B10A2_UINT, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, 0),
+
+ FORMAT(A4B4G4R4_UNORM, ABGR4444, RGBA4, SWIZ_XYZW, 16, 0),
+
+ FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0),
+ FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0),
+ FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0),
+
+ FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0),
+ FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0),
+
+ FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1),
+ FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1),
+ FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0),
+ FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1),
+
+ FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2),
+
+ FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0),
+ FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4),
+
+ /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */
+ FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1),
+ FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1),
+ FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1),
+ FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1),
+ FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1),
+ FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1),
+ FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2),
+ FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2),
+
+ FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0),
+ FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0),
+ FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0),
+ FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0),
+
+ FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, 0),
+ FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, 0),
+ FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, 0),
+ FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, 0),
+ FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, 1),
+ FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, 1),
+ FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, 2),
+ FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, 2),
+ FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, 4),
+ FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, 4),
+
+ FORMAT(A8_SINT, R8I, R8I, SWIZ_000X, 16, 0),
+ FORMAT(A8_UINT, R8UI, R8UI, SWIZ_000X, 16, 0),
+ FORMAT(A16_SINT, R16I, R16I, SWIZ_000X, 16, 0),
+ FORMAT(A16_UINT, R16UI, R16UI, SWIZ_000X, 16, 0),
+ FORMAT(A32_SINT, R32I, R32I, SWIZ_000X, 32, 1),
+ FORMAT(A32_UINT, R32UI, R32UI, SWIZ_000X, 32, 1),
+
+ FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0),
+ FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0),
+
+#if V3D_VERSION >= 40
+ FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
+ FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
+ FORMAT(S8X24_UINT, S8, R32F, SWIZ_XXXX, 32, 1),
+ FORMAT(Z32_FLOAT, D32F, R32F, SWIZ_XXXX, 32, 1),
+ FORMAT(Z16_UNORM, D16, DEPTH_COMP16,SWIZ_XXXX, 32, 1),
+
+ /* Pretend we support this, but it'll be separate Z32F depth and S8. */
+ FORMAT(Z32_FLOAT_S8X24_UINT, D32F, R32F, SWIZ_XXXX, 32, 1),
+#else
+ FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
+ FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
+ FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1),
+ FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
+ FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1),
+
+ /* Pretend we support this, but it'll be separate Z32F depth and S8. */
+ FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
+#endif
+
+ FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_SRGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0),
+ FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0),
+
+ FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0),
+};
+
+const struct vc5_format *
+v3dX(get_format_desc)(enum pipe_format f)
+{
+ if (f < ARRAY_SIZE(format_table) && format_table[f].present)
+ return &format_table[f];
+ else
+ return NULL;
+}
+
+void
+v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp)
+{
+ switch (format) {
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
+#if V3D_VERSION < 41
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBX8:
+#endif
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8:
+ case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444:
+ case V3D_OUTPUT_IMAGE_FORMAT_BGR565:
+ case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555:
+ *type = V3D_INTERNAL_TYPE_8;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8I:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8I:
+ *type = V3D_INTERNAL_TYPE_8I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8UI:
+ *type = V3D_INTERNAL_TYPE_8UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
+ case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
+ case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
+#if V3D_VERSION < 41
+ case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8:
+#endif
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
+ /* Note that sRGB RTs are stored in the tile buffer at 16F,
+ * and the conversion to sRGB happens at tilebuffer
+ * load/store.
+ */
+ *type = V3D_INTERNAL_TYPE_16F;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16F:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16F:
+ *type = V3D_INTERNAL_TYPE_16F;
+ /* Use 64bpp to make sure the TLB doesn't throw away the alpha
+ * channel before alpha test happens.
+ */
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I:
+ *type = V3D_INTERNAL_TYPE_16I;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16I:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16I:
+ *type = V3D_INTERNAL_TYPE_16I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI:
+ *type = V3D_INTERNAL_TYPE_16UI;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16UI:
+ *type = V3D_INTERNAL_TYPE_16UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_R32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_R32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case V3D_OUTPUT_IMAGE_FORMAT_R32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ default:
+ /* Provide some default values, as we'll be called at RB
+ * creation time, even if an RB with this format isn't
+ * supported.
+ */
+ *type = V3D_INTERNAL_TYPE_8;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+ }
+}
diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c
new file mode 100644
index 00000000000..5e1a345b170
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_job.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file v3dx_job.c
+ *
+ * V3D version-specific functions for submitting VC5 render jobs to the
+ * kernel.
+ */
+
+#include "v3d_context.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job)
+{
+ vc5_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(OCCLUSION_QUERY_COUNTER) +
+#if V3D_VERSION >= 41
+ cl_packet_length(TRANSFORM_FEEDBACK_SPECS) +
+#endif
+ cl_packet_length(INCREMENT_SEMAPHORE) +
+ cl_packet_length(FLUSH_ALL_STATE));
+
+ if (job->oq_enabled) {
+ /* Disable the OQ at the end of the CL, so that the
+ * draw calls at the start of the CL don't inherit the
+ * OQ counter.
+ */
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
+ }
+
+ /* Disable TF at the end of the CL, so that the next job to be
+ * run doesn't start out trying to write TF primitives. On
+ * V3D 3.x, it's only the TF primitive mode that triggers TF
+ * writes.
+ */
+#if V3D_VERSION >= 41
+ if (job->tf_enabled) {
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
+ tfe.enable = false;
+ };
+ }
+#endif /* V3D_VERSION >= 41 */
+
+ /* Increment the semaphore indicating that binning is done and
+ * unblocking the render thread. Note that this doesn't act
+ * until the FLUSH completes.
+ */
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
+
+ /* The FLUSH_ALL emits any unwritten state changes in each
+ * tile. We can use this to reset any state that needs to be
+ * present at the start of the next tile, as we do with
+ * OCCLUSION_QUERY_COUNTER above.
+ */
+ cl_emit(&job->bcl, FLUSH_ALL_STATE, flush);
+}
diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c
new file mode 100644
index 00000000000..3801d03ecee
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_rcl.c
@@ -0,0 +1,782 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "v3d_context.h"
+#include "v3d_tiling.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \
+ PIPE_CLEAR_COLOR1 | \
+ PIPE_CLEAR_COLOR2 | \
+ PIPE_CLEAR_COLOR3) \
+
+#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
+
+/* The HW queues up the load until the tile coordinates show up, but can only
+ * track one at a time. If we need to do more than one load, then we need to
+ * flush out the previous load by emitting the tile coordinates and doing a
+ * dummy store.
+ */
+static void
+flush_last_load(struct vc5_cl *cl)
+{
+ if (V3D_VERSION >= 40)
+ return;
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+}
+
+static void
+load_general(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer,
+ uint32_t pipe_bit, uint32_t *loads_pending)
+{
+ struct vc5_surface *surf = vc5_surface(psurf);
+ bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
+ if (separate_stencil) {
+ psurf = surf->separate_stencil;
+ surf = vc5_surface(psurf);
+ }
+
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
+ load.buffer_to_load = buffer;
+ load.address = cl_address(rsc->bo, surf->offset);
+
+#if V3D_VERSION >= 40
+ load.memory_format = surf->tiling;
+ if (separate_stencil)
+ load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
+ else
+ load.input_image_format = surf->format;
+
+ if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
+ surf->tiling == VC5_TILING_UIF_XOR) {
+ load.height_in_ub_or_stride =
+ surf->padded_height_of_output_image_in_uif_blocks;
+ } else if (surf->tiling == VC5_TILING_RASTER) {
+ struct vc5_resource_slice *slice =
+ &rsc->slices[psurf->u.tex.level];
+ load.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (psurf->texture->nr_samples > 1)
+ load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else
+ load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+
+#else /* V3D_VERSION < 40 */
+ /* Can't do raw ZSTENCIL loads -- need to load/store them to
+ * separate buffers for Z and stencil.
+ */
+ assert(buffer != ZSTENCIL);
+ load.raw_mode = true;
+ load.padded_height_of_output_image_in_uif_blocks =
+ surf->padded_height_of_output_image_in_uif_blocks;
+#endif /* V3D_VERSION < 40 */
+ }
+
+ *loads_pending &= ~pipe_bit;
+ if (*loads_pending)
+ flush_last_load(cl);
+}
+
+static void
+store_general(struct vc5_job *job,
+ struct vc5_cl *cl, struct pipe_surface *psurf, int buffer,
+ int pipe_bit, uint32_t *stores_pending, bool general_color_clear)
+{
+ struct vc5_surface *surf = vc5_surface(psurf);
+ bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
+ if (separate_stencil) {
+ psurf = surf->separate_stencil;
+ surf = vc5_surface(psurf);
+ }
+
+ *stores_pending &= ~pipe_bit;
+ bool last_store = !(*stores_pending);
+
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ rsc->writes++;
+
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = buffer;
+ store.address = cl_address(rsc->bo, surf->offset);
+
+#if V3D_VERSION >= 40
+ store.clear_buffer_being_stored =
+ ((job->cleared & pipe_bit) &&
+ (general_color_clear ||
+ !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS)));
+
+ if (separate_stencil)
+ store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
+ else
+ store.output_image_format = surf->format;
+
+ store.memory_format = surf->tiling;
+
+ if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
+ surf->tiling == VC5_TILING_UIF_XOR) {
+ store.height_in_ub_or_stride =
+ surf->padded_height_of_output_image_in_uif_blocks;
+ } else if (surf->tiling == VC5_TILING_RASTER) {
+ struct vc5_resource_slice *slice =
+ &rsc->slices[psurf->u.tex.level];
+ store.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (psurf->texture->nr_samples > 1)
+ store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else
+ store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+
+#else /* V3D_VERSION < 40 */
+ /* Can't do raw ZSTENCIL stores -- need to load/store them to
+ * separate buffers for Z and stencil.
+ */
+ assert(buffer != ZSTENCIL);
+ store.raw_mode = true;
+ if (!last_store) {
+ store.disable_colour_buffers_clear_on_write = true;
+ store.disable_z_buffer_clear_on_write = true;
+ store.disable_stencil_buffer_clear_on_write = true;
+ } else {
+ store.disable_colour_buffers_clear_on_write =
+ !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
+ general_color_clear &&
+ (job->cleared & pipe_bit)));
+ store.disable_z_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_DEPTH);
+ store.disable_stencil_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_STENCIL);
+ }
+ store.padded_height_of_output_image_in_uif_blocks =
+ surf->padded_height_of_output_image_in_uif_blocks;
+#endif /* V3D_VERSION < 40 */
+ }
+
+ /* There must be a TILE_COORDINATES_IMPLICIT between each store. */
+ if (V3D_VERSION < 40 && !last_store) {
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+ }
+}
+
+static int
+zs_buffer_from_pipe_bits(int pipe_clear_bits)
+{
+ switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
+ case PIPE_CLEAR_DEPTHSTENCIL:
+ return ZSTENCIL;
+ case PIPE_CLEAR_DEPTH:
+ return Z;
+ case PIPE_CLEAR_STENCIL:
+ return STENCIL;
+ default:
+ return NONE;
+ }
+}
+
+static void
+vc5_rcl_emit_loads(struct vc5_job *job, struct vc5_cl *cl)
+{
+ uint32_t loads_pending = job->resolve & ~job->cleared;
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+ if (!(loads_pending & bit))
+ continue;
+
+ struct pipe_surface *psurf = job->cbufs[i];
+ if (!psurf || (V3D_VERSION < 40 &&
+ psurf->texture->nr_samples <= 1)) {
+ continue;
+ }
+
+ load_general(cl, psurf, RENDER_TARGET_0 + i,
+ bit, &loads_pending);
+ }
+
+ if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) &&
+ (V3D_VERSION >= 40 ||
+ (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
+ struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+
+ if (rsc->separate_stencil &&
+ (loads_pending & PIPE_CLEAR_STENCIL)) {
+ load_general(cl, job->zsbuf,
+ STENCIL,
+ PIPE_CLEAR_STENCIL,
+ &loads_pending);
+ }
+
+ if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
+ load_general(cl, job->zsbuf,
+ zs_buffer_from_pipe_bits(loads_pending),
+ loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
+ &loads_pending);
+ }
+ }
+
+#if V3D_VERSION < 40
+ /* The initial reload will be queued until we get the
+ * tile coordinates.
+ */
+ if (loads_pending) {
+ cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
+ load.disable_colour_buffer_load =
+ (~loads_pending &
+ PIPE_CLEAR_COLOR_BUFFERS) >>
+ PIPE_FIRST_COLOR_BUFFER_BIT;
+ load.enable_z_load =
+ loads_pending & PIPE_CLEAR_DEPTH;
+ load.enable_stencil_load =
+ loads_pending & PIPE_CLEAR_STENCIL;
+ }
+ }
+#else /* V3D_VERSION >= 40 */
+ assert(!loads_pending);
+ cl_emit(cl, END_OF_LOADS, end);
+#endif
+}
+
+static void
+vc5_rcl_emit_stores(struct vc5_job *job, struct vc5_cl *cl)
+{
+ MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS;
+ MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH;
+ MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL;
+
+ /* For clearing color in a TLB general on V3D 3.3:
+ *
+ * - NONE buffer store clears all TLB color buffers.
+ * - color buffer store clears just the TLB color buffer being stored.
+ * - Z/S buffers store may not clear the TLB color buffer.
+ *
+ * And on V3D 4.1, we only have one flag for "clear the buffer being
+ * stored" in the general packet, and a separate packet to clear all
+ * color TLB buffers.
+ *
+ * As a result, we only bother flagging TLB color clears in a general
+ * packet when we don't have to emit a separate packet to clear all
+ * TLB color buffers.
+ */
+ bool general_color_clear = (needs_color_clear &&
+ (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) ==
+ (job->resolve & PIPE_CLEAR_COLOR_BUFFERS));
+
+ uint32_t stores_pending = job->resolve;
+
+ /* For V3D 4.1, use general stores for all TLB stores.
+ *
+ * For V3D 3.3, we only use general stores to do raw stores for any
+ * MSAA surfaces. These output UIF tiled images where each 4x MSAA
+ * pixel is a 2x2 quad, and the format will be that of the
+ * internal_type/internal_bpp, rather than the format from GL's
+ * perspective. Non-MSAA surfaces will use
+ * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
+ */
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+ if (!(job->resolve & bit))
+ continue;
+
+ struct pipe_surface *psurf = job->cbufs[i];
+ if (!psurf ||
+ (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
+ continue;
+ }
+
+ store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit,
+ &stores_pending, general_color_clear);
+ }
+
+ if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
+ !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
+ struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+ if (rsc->separate_stencil) {
+ if (job->resolve & PIPE_CLEAR_DEPTH) {
+ store_general(job, cl, job->zsbuf, Z,
+ PIPE_CLEAR_DEPTH,
+ &stores_pending,
+ general_color_clear);
+ }
+
+ if (job->resolve & PIPE_CLEAR_STENCIL) {
+ store_general(job, cl, job->zsbuf, STENCIL,
+ PIPE_CLEAR_STENCIL,
+ &stores_pending,
+ general_color_clear);
+ }
+ } else {
+ store_general(job, cl, job->zsbuf,
+ zs_buffer_from_pipe_bits(job->resolve),
+ job->resolve & PIPE_CLEAR_DEPTHSTENCIL,
+ &stores_pending, general_color_clear);
+ }
+ }
+
+ if (stores_pending) {
+#if V3D_VERSION < 40
+ cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+
+ store.disable_color_buffer_write =
+ (~stores_pending >>
+ PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
+ store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
+ store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
+
+ /* Note that when set this will clear all of the color
+ * buffers.
+ */
+ store.disable_colour_buffers_clear_on_write =
+ !needs_color_clear;
+ store.disable_z_buffer_clear_on_write =
+ !needs_z_clear;
+ store.disable_stencil_buffer_clear_on_write =
+ !needs_s_clear;
+ };
+#else /* V3D_VERSION >= 40 */
+ unreachable("All color buffers should have been stored.");
+#endif /* V3D_VERSION >= 40 */
+ } else if (needs_color_clear && !general_color_clear) {
+ /* If we didn't do our color clears in the general packet,
+ * then emit a packet to clear all the TLB color buffers now.
+ */
+#if V3D_VERSION < 40
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+#else /* V3D_VERSION >= 40 */
+ cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
+ clear.clear_all_render_targets = true;
+ }
+#endif /* V3D_VERSION >= 40 */
+ }
+}
+
+static void
+vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf)
+{
+ /* Emit the generic list in our indirect state -- the rcl will just
+ * have pointers into it.
+ */
+ struct vc5_cl *cl = &job->indirect;
+ vc5_cl_ensure_space(cl, 200, 1);
+ struct vc5_cl_reloc tile_list_start = cl_get_address(cl);
+
+ if (V3D_VERSION >= 40) {
+ /* V3D 4.x only requires a single tile coordinates, and
+ * END_OF_LOADS switches us between loading and rendering.
+ */
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+ }
+
+ vc5_rcl_emit_loads(job, cl);
+
+ if (V3D_VERSION < 40) {
+ /* Tile Coordinates triggers the last reload and sets where
+ * the stores go. There must be one per store packet.
+ */
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+ }
+
+ /* The binner starts out writing tiles assuming that the initial mode
+ * is triangles, so make sure that's the case.
+ */
+ cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) {
+ fmt.data_type = LIST_INDEXED;
+ fmt.primitive_type = LIST_TRIANGLES;
+ }
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ vc5_rcl_emit_stores(job, cl);
+
+#if V3D_VERSION >= 40
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+#endif
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = cl_get_address(cl);
+ }
+}
+
+#if V3D_VERSION >= 40
+static void
+v3d_setup_render_target(struct vc5_job *job, int cbuf,
+ uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp)
+{
+ if (!job->cbufs[cbuf])
+ return;
+
+ struct vc5_surface *surf = vc5_surface(job->cbufs[cbuf]);
+ *rt_bpp = surf->internal_bpp;
+ *rt_type = surf->internal_type;
+ *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
+}
+
+#else /* V3D_VERSION < 40 */
+
+static void
+v3d_emit_z_stencil_config(struct vc5_job *job, struct vc5_surface *surf,
+ struct vc5_resource *rsc, bool is_separate_stencil)
+{
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
+ zs.address = cl_address(rsc->bo, surf->offset);
+
+ if (!is_separate_stencil) {
+ zs.internal_type = surf->internal_type;
+ zs.output_image_format = surf->format;
+ } else {
+ zs.z_stencil_id = 1; /* Separate stencil */
+ }
+
+ zs.padded_height_of_output_image_in_uif_blocks =
+ surf->padded_height_of_output_image_in_uif_blocks;
+
+ assert(surf->tiling != VC5_TILING_RASTER);
+ zs.memory_format = surf->tiling;
+ }
+
+ if (job->resolve & (is_separate_stencil ?
+ PIPE_CLEAR_STENCIL :
+ PIPE_CLEAR_DEPTHSTENCIL)) {
+ rsc->writes++;
+ }
+}
+#endif /* V3D_VERSION < 40 */
+
+#define div_round_up(a, b) (((a) + (b) - 1) / b)
+
+void
+v3dX(emit_rcl)(struct vc5_job *job)
+{
+ /* The RCL list should be empty. */
+ assert(!job->rcl.bo);
+
+ vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 *
+ cl_packet_length(SUPERTILE_COORDINATES));
+ job->submit.rcl_start = job->rcl.bo->offset;
+ vc5_job_add_bo(job, job->rcl.bo);
+
+ int nr_cbufs = 0;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i])
+ nr_cbufs = i + 1;
+ }
+
+ /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
+ * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are
+ * optional updates to the previous HW state.
+ */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
+ config) {
+#if V3D_VERSION < 40
+ config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
+ config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
+#else /* V3D_VERSION >= 40 */
+ if (job->zsbuf) {
+ struct vc5_surface *surf = vc5_surface(job->zsbuf);
+ config.internal_depth_type = surf->internal_type;
+ }
+#endif /* V3D_VERSION >= 40 */
+
+ /* XXX: Early D/S clear */
+
+ switch (job->first_ez_state) {
+ case VC5_EZ_UNDECIDED:
+ case VC5_EZ_LT_LE:
+ config.early_z_disable = false;
+ config.early_z_test_and_update_direction =
+ EARLY_Z_DIRECTION_LT_LE;
+ break;
+ case VC5_EZ_GT_GE:
+ config.early_z_disable = false;
+ config.early_z_test_and_update_direction =
+ EARLY_Z_DIRECTION_GT_GE;
+ break;
+ case VC5_EZ_DISABLED:
+ config.early_z_disable = true;
+ }
+
+ config.image_width_pixels = job->draw_width;
+ config.image_height_pixels = job->draw_height;
+
+ config.number_of_render_targets_minus_1 =
+ MAX2(nr_cbufs, 1) - 1;
+
+ config.multisample_mode_4x = job->msaa;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ for (int i = 0; i < nr_cbufs; i++) {
+ struct pipe_surface *psurf = job->cbufs[i];
+ if (!psurf)
+ continue;
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ MAYBE_UNUSED uint32_t config_pad = 0;
+ uint32_t clear_pad = 0;
+
+ /* XXX: Set the pad for raster. */
+ if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
+ surf->tiling == VC5_TILING_UIF_XOR) {
+ int uif_block_height = vc5_utile_height(rsc->cpp) * 2;
+ uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
+ uif_block_height);
+ if (surf->padded_height_of_output_image_in_uif_blocks -
+ implicit_padded_height < 15) {
+ config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
+ implicit_padded_height);
+ } else {
+ config_pad = 15;
+ clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
+ }
+ }
+
+#if V3D_VERSION < 40
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+ rt.address = cl_address(rsc->bo, surf->offset);
+ rt.internal_type = surf->internal_type;
+ rt.output_image_format = surf->format;
+ rt.memory_format = surf->tiling;
+ rt.internal_bpp = surf->internal_bpp;
+ rt.render_target_number = i;
+ rt.pad = config_pad;
+
+ if (job->resolve & PIPE_CLEAR_COLOR0 << i)
+ rsc->writes++;
+ }
+#endif /* V3D_VERSION < 40 */
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
+ clear) {
+ clear.clear_color_low_32_bits = job->clear_color[i][0];
+ clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
+ clear.render_target_number = i;
+ };
+
+ if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2,
+ clear) {
+ clear.clear_color_mid_low_32_bits =
+ ((job->clear_color[i][1] >> 24) |
+ (job->clear_color[i][2] << 8));
+ clear.clear_color_mid_high_24_bits =
+ ((job->clear_color[i][2] >> 24) |
+ ((job->clear_color[i][3] & 0xffff) << 8));
+ clear.render_target_number = i;
+ };
+ }
+
+ if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3,
+ clear) {
+ clear.uif_padded_height_in_uif_blocks = clear_pad;
+ clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
+ clear.render_target_number = i;
+ };
+ }
+ }
+
+#if V3D_VERSION >= 40
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+ v3d_setup_render_target(job, 0,
+ &rt.render_target_0_internal_bpp,
+ &rt.render_target_0_internal_type,
+ &rt.render_target_0_clamp);
+ v3d_setup_render_target(job, 1,
+ &rt.render_target_1_internal_bpp,
+ &rt.render_target_1_internal_type,
+ &rt.render_target_1_clamp);
+ v3d_setup_render_target(job, 2,
+ &rt.render_target_2_internal_bpp,
+ &rt.render_target_2_internal_type,
+ &rt.render_target_2_clamp);
+ v3d_setup_render_target(job, 3,
+ &rt.render_target_3_internal_bpp,
+ &rt.render_target_3_internal_type,
+ &rt.render_target_3_clamp);
+ }
+#endif
+
+#if V3D_VERSION < 40
+ /* TODO: Don't bother emitting if we don't load/clear Z/S. */
+ if (job->zsbuf) {
+ struct pipe_surface *psurf = job->zsbuf;
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ v3d_emit_z_stencil_config(job, surf, rsc, false);
+
+ /* Emit the separate stencil packet if we have a resource for
+ * it. The HW will only load/store this buffer if the
+ * Z/Stencil config doesn't have stencil in its format.
+ */
+ if (surf->separate_stencil) {
+ v3d_emit_z_stencil_config(job,
+ vc5_surface(surf->separate_stencil),
+ rsc->separate_stencil, true);
+ }
+ }
+#endif /* V3D_VERSION < 40 */
+
+ /* Ends rendering mode config. */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
+ clear) {
+ clear.z_clear_value = job->clear_z;
+ clear.stencil_vg_mask_clear_value = job->clear_s;
+ };
+
+ /* Always set initial block size before the first branch, which needs
+ * to match the value from binning mode config.
+ */
+ cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ uint32_t supertile_w = 1, supertile_h = 1;
+
+ /* If doing multicore binning, we would need to initialize each core's
+ * tile list here.
+ */
+ cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+ list.address = cl_address(job->tile_alloc, 0);
+ }
+
+ cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
+ uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
+ const uint32_t max_supertiles = 256;
+
+ /* Size up our supertiles until we get under the limit. */
+ for (;;) {
+ frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
+ supertile_w);
+ frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
+ supertile_h);
+ if (frame_w_in_supertiles * frame_h_in_supertiles <
+ max_supertiles) {
+ break;
+ }
+
+ if (supertile_w < supertile_h)
+ supertile_w++;
+ else
+ supertile_h++;
+ }
+
+ config.total_frame_width_in_tiles = job->draw_tiles_x;
+ config.total_frame_height_in_tiles = job->draw_tiles_y;
+
+ config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
+ config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
+
+ config.total_frame_width_in_supertiles = frame_w_in_supertiles;
+ config.total_frame_height_in_supertiles = frame_h_in_supertiles;
+ }
+
+ /* Start by clearing the tile buffer. */
+ cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+ coords.tile_column_number = 0;
+ coords.tile_row_number = 0;
+ }
+
+ /* Emit an initial clear of the tile buffers. This is necessary for
+ * any buffers that should be cleared (since clearing normally happens
+ * at the *end* of the generic tile list), but it's also nice to clear
+ * everything so the first tile doesn't inherit any contents from some
+ * previous frame.
+ *
+ * Also, implement the GFXH-1742 workaround. There's a race in the HW
+ * between the RCL updating the TLB's internal type/size and the
+ * spawning of the QPU instances using the TLB's current internal
+ * type/size. To make sure the QPUs get the right state,, we need 1
+ * dummy store in between internal type/size changes on V3D 3.x, and 2
+ * dummy stores on 4.x.
+ */
+#if V3D_VERSION < 40
+ cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+#else
+ for (int i = 0; i < 2; i++) {
+ if (i > 0)
+ cl_emit(&job->rcl, TILE_COORDINATES, coords);
+ cl_emit(&job->rcl, END_OF_LOADS, end);
+ cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+ if (i == 0) {
+ cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
+ clear.clear_z_stencil_buffer = true;
+ clear.clear_all_render_targets = true;
+ }
+ }
+ cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
+ }
+#endif
+
+ cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
+
+ vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
+
+ cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
+
+ /* XXX: Use Morton order */
+ uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
+ uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
+ uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
+ uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
+
+ uint32_t max_x_supertile = 0;
+ uint32_t max_y_supertile = 0;
+ if (job->draw_max_x != 0 && job->draw_max_y != 0) {
+ max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
+ max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
+ }
+
+ for (int y = min_y_supertile; y <= max_y_supertile; y++) {
+ for (int x = min_x_supertile; x <= max_x_supertile; x++) {
+ cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
+ coords.column_number_in_supertiles = x;
+ coords.row_number_in_supertiles = y;
+ }
+ }
+ }
+
+ cl_emit(&job->rcl, END_OF_RENDERING, end);
+}
diff --git a/src/gallium/drivers/v3d/v3dx_simulator.c b/src/gallium/drivers/v3d/v3dx_simulator.c
new file mode 100644
index 00000000000..ee8b6f2b9fd
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_simulator.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_simulator_hw.c
+ *
+ * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
+ *
+ * The register headers between V3D versions will have conflicting defines, so
+ * all register interactions appear in this file and are compiled per V3D version
+ * we support.
+ */
+
+#ifdef USE_V3D_SIMULATOR
+
+#include "v3d_screen.h"
+#include "v3d_context.h"
+#include "v3d_simulator_wrapper.h"
+
+#define HW_REGISTER_RO(x) (x)
+#define HW_REGISTER_RW(x) (x)
+#if V3D_VERSION >= 41
+#include "libs/core/v3d/registers/4.1.34.0/v3d.h"
+#else
+#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
+#endif
+
+#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
+#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
+
+static void
+vc5_flush_l3(struct v3d_hw *v3d)
+{
+ if (!v3d_hw_has_gca(v3d))
+ return;
+
+#if V3D_VERSION < 40
+ uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
+
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
+#endif
+}
+
+/* Invalidates the L2 cache. This is a read-only cache. */
+static void
+vc5_flush_l2(struct v3d_hw *v3d)
+{
+ V3D_WRITE(V3D_CTL_0_L2CACTL,
+ V3D_CTL_0_L2CACTL_L2CCLR_SET |
+ V3D_CTL_0_L2CACTL_L2CENA_SET);
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+vc5_flush_l2t(struct v3d_hw *v3d)
+{
+ V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
+ V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
+ V3D_WRITE(V3D_CTL_0_L2TCACTL,
+ V3D_CTL_0_L2TCACTL_L2TFLS_SET |
+ (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+}
+
+/* Invalidates the slice caches. These are read-only caches. */
+static void
+vc5_flush_slices(struct v3d_hw *v3d)
+{
+ V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
+}
+
+static void
+vc5_flush_caches(struct v3d_hw *v3d)
+{
+ vc5_flush_l3(v3d);
+ vc5_flush_l2(v3d);
+ vc5_flush_l2t(v3d);
+ vc5_flush_slices(v3d);
+}
+
+int
+v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
+ struct drm_v3d_get_param *args)
+{
+ static const uint32_t reg_map[] = {
+ [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
+ };
+
+ if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
+ args->value = V3D_READ(reg_map[args->param]);
+ return 0;
+ }
+
+ fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
+ (long long)args->value);
+ abort();
+}
+
+void
+v3dX(simulator_init_regs)(struct v3d_hw *v3d)
+{
+#if V3D_VERSION == 33
+ /* Set OVRTMUOUT to match kernel behavior.
+ *
+ * This means that the texture sampler uniform configuration's tmu
+ * output type field is used, instead of using the hardware default
+ * behavior based on the texture type. If you want the default
+ * behavior, you can still put "2" in the indirect texture state's
+ * output_type field.
+ */
+ V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
+#endif
+}
+
+void
+v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
+ uint32_t gmp_ofs)
+{
+ /* Completely reset the GMP. */
+ V3D_WRITE(V3D_GMP_0_CFG,
+ V3D_GMP_0_CFG_PROTENABLE_SET);
+ V3D_WRITE(V3D_GMP_0_TABLE_ADDR, gmp_ofs);
+ V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0);
+ while (V3D_READ(V3D_GMP_0_STATUS) &
+ V3D_GMP_0_STATUS_CFG_BUSY_SET) {
+ ;
+ }
+
+ vc5_flush_caches(v3d);
+
+ if (submit->qma) {
+ V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
+ V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
+ }
+#if V3D_VERSION >= 41
+ if (submit->qts) {
+ V3D_WRITE(V3D_CLE_0_CT0QTS,
+ V3D_CLE_0_CT0QTS_CTQTSEN_SET |
+ submit->qts);
+ }
+#endif
+ V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
+ V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
+
+ /* Wait for bin to complete before firing render, as it seems the
+ * simulator doesn't implement the semaphores.
+ */
+ while (V3D_READ(V3D_CLE_0_CT0CA) !=
+ V3D_READ(V3D_CLE_0_CT0EA)) {
+ v3d_hw_tick(v3d);
+ }
+
+ V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
+ V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
+
+ while (V3D_READ(V3D_CLE_0_CT1CA) !=
+ V3D_READ(V3D_CLE_0_CT1EA) ||
+ V3D_READ(V3D_CLE_1_CT1CA) !=
+ V3D_READ(V3D_CLE_1_CT1EA)) {
+ v3d_hw_tick(v3d);
+ }
+}
+
+#endif /* USE_V3D_SIMULATOR */
diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c
new file mode 100644
index 00000000000..e992796a218
--- /dev/null
+++ b/src/gallium/drivers/v3d/v3dx_state.c
@@ -0,0 +1,951 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_framebuffer.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_half.h"
+#include "util/u_helpers.h"
+
+#include "v3d_context.h"
+#include "v3d_tiling.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+static void *
+vc5_generic_cso_state_create(const void *src, uint32_t size)
+{
+ void *dst = calloc(1, size);
+ if (!dst)
+ return NULL;
+ memcpy(dst, src, size);
+ return dst;
+}
+
+static void
+vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+vc5_set_blend_color(struct pipe_context *pctx,
+ const struct pipe_blend_color *blend_color)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend_color.f = *blend_color;
+ for (int i = 0; i < 4; i++) {
+ vc5->blend_color.hf[i] =
+ util_float_to_half(blend_color->color[i]);
+ }
+ vc5->dirty |= VC5_DIRTY_BLEND_COLOR;
+}
+
+static void
+vc5_set_stencil_ref(struct pipe_context *pctx,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stencil_ref = *stencil_ref;
+ vc5->dirty |= VC5_DIRTY_STENCIL_REF;
+}
+
+static void
+vc5_set_clip_state(struct pipe_context *pctx,
+ const struct pipe_clip_state *clip)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->clip = *clip;
+ vc5->dirty |= VC5_DIRTY_CLIP;
+}
+
+static void
+vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
+ vc5->dirty |= VC5_DIRTY_SAMPLE_MASK;
+}
+
+static uint16_t
+float_to_187_half(float f)
+{
+ return fui(f) >> 16;
+}
+
+static void *
+vc5_create_rasterizer_state(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct vc5_rasterizer_state *so;
+
+ so = CALLOC_STRUCT(vc5_rasterizer_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+ * BCM21553).
+ */
+ so->point_size = MAX2(cso->point_size, .125f);
+
+ if (cso->offset_tri) {
+ so->offset_units = float_to_187_half(cso->offset_units);
+ so->offset_factor = float_to_187_half(cso->offset_scale);
+ }
+
+ return so;
+}
+
+/* Blend state is baked into shaders. */
+static void *
+vc5_create_blend_state(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso)
+{
+ return vc5_generic_cso_state_create(cso, sizeof(*cso));
+}
+
+static uint32_t
+translate_stencil_op(enum pipe_stencil_op op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP: return V3D_STENCIL_OP_KEEP;
+ case PIPE_STENCIL_OP_ZERO: return V3D_STENCIL_OP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE: return V3D_STENCIL_OP_REPLACE;
+ case PIPE_STENCIL_OP_INCR: return V3D_STENCIL_OP_INCR;
+ case PIPE_STENCIL_OP_DECR: return V3D_STENCIL_OP_DECR;
+ case PIPE_STENCIL_OP_INCR_WRAP: return V3D_STENCIL_OP_INCWRAP;
+ case PIPE_STENCIL_OP_DECR_WRAP: return V3D_STENCIL_OP_DECWRAP;
+ case PIPE_STENCIL_OP_INVERT: return V3D_STENCIL_OP_INVERT;
+ }
+ unreachable("bad stencil op");
+}
+
+static void *
+vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct vc5_depth_stencil_alpha_state *so;
+
+ so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ if (cso->depth.enabled) {
+ switch (cso->depth.func) {
+ case PIPE_FUNC_LESS:
+ case PIPE_FUNC_LEQUAL:
+ so->ez_state = VC5_EZ_LT_LE;
+ break;
+ case PIPE_FUNC_GREATER:
+ case PIPE_FUNC_GEQUAL:
+ so->ez_state = VC5_EZ_GT_GE;
+ break;
+ case PIPE_FUNC_NEVER:
+ case PIPE_FUNC_EQUAL:
+ so->ez_state = VC5_EZ_UNDECIDED;
+ break;
+ default:
+ so->ez_state = VC5_EZ_DISABLED;
+ break;
+ }
+
+ /* If stencil is enabled and it's not a no-op, then it would
+ * break EZ updates.
+ */
+ if (cso->stencil[0].enabled &&
+ (cso->stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP ||
+ cso->stencil[0].func != PIPE_FUNC_ALWAYS ||
+ (cso->stencil[1].enabled &&
+ (cso->stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP &&
+ cso->stencil[1].func != PIPE_FUNC_ALWAYS)))) {
+ so->ez_state = VC5_EZ_DISABLED;
+ }
+ }
+
+ const struct pipe_stencil_state *front = &cso->stencil[0];
+ const struct pipe_stencil_state *back = &cso->stencil[1];
+
+ if (front->enabled) {
+ v3dx_pack(&so->stencil_front, STENCIL_CONFIG, config) {
+ config.front_config = true;
+ /* If !back->enabled, then the front values should be
+ * used for both front and back-facing primitives.
+ */
+ config.back_config = !back->enabled;
+
+ config.stencil_write_mask = front->writemask;
+ config.stencil_test_mask = front->valuemask;
+
+ config.stencil_test_function = front->func;
+ config.stencil_pass_op =
+ translate_stencil_op(front->zpass_op);
+ config.depth_test_fail_op =
+ translate_stencil_op(front->zfail_op);
+ config.stencil_test_fail_op =
+ translate_stencil_op(front->fail_op);
+ }
+ }
+ if (back->enabled) {
+ v3dx_pack(&so->stencil_back, STENCIL_CONFIG, config) {
+ config.front_config = false;
+ config.back_config = true;
+
+ config.stencil_write_mask = back->writemask;
+ config.stencil_test_mask = back->valuemask;
+
+ config.stencil_test_function = back->func;
+ config.stencil_pass_op =
+ translate_stencil_op(back->zpass_op);
+ config.depth_test_fail_op =
+ translate_stencil_op(back->zfail_op);
+ config.stencil_test_fail_op =
+ translate_stencil_op(back->fail_op);
+ }
+ }
+
+ return so;
+}
+
+static void
+vc5_set_polygon_stipple(struct pipe_context *pctx,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stipple = *stipple;
+ vc5->dirty |= VC5_DIRTY_STIPPLE;
+}
+
+static void
+vc5_set_scissor_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5->scissor = *scissor;
+ vc5->dirty |= VC5_DIRTY_SCISSOR;
+}
+
+static void
+vc5_set_viewport_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewport)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->viewport = *viewport;
+ vc5->dirty |= VC5_DIRTY_VIEWPORT;
+}
+
+static void
+vc5_set_vertex_buffers(struct pipe_context *pctx,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf;
+
+ util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
+ start_slot, count);
+ so->count = util_last_bit(so->enabled_mask);
+
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+}
+
+static void
+vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend = hwcso;
+ vc5->dirty |= VC5_DIRTY_BLEND;
+}
+
+static void
+vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->rasterizer = hwcso;
+ vc5->dirty |= VC5_DIRTY_RASTERIZER;
+}
+
+static void
+vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->zsa = hwcso;
+ vc5->dirty |= VC5_DIRTY_ZSA;
+}
+
+static void *
+vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+ so->num_elements = num_elements;
+
+ for (int i = 0; i < so->num_elements; i++) {
+ const struct pipe_vertex_element *elem = &elements[i];
+ const struct util_format_description *desc =
+ util_format_description(elem->src_format);
+ uint32_t r_size = desc->channel[0].size;
+
+ const uint32_t size =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+
+ v3dx_pack(&so->attrs[i * size],
+ GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+ /* vec_size == 0 means 4 */
+ attr.vec_size = desc->nr_channels & 3;
+ attr.signed_int_type = (desc->channel[0].type ==
+ UTIL_FORMAT_TYPE_SIGNED);
+
+ attr.normalized_int_type = desc->channel[0].normalized;
+ attr.read_as_int_uint = desc->channel[0].pure_integer;
+ attr.instance_divisor = MIN2(elem->instance_divisor,
+ 0xffff);
+
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (r_size == 32) {
+ attr.type = ATTRIBUTE_FLOAT;
+ } else {
+ assert(r_size == 16);
+ attr.type = ATTRIBUTE_HALF_FLOAT;
+ }
+ break;
+
+ case UTIL_FORMAT_TYPE_SIGNED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ switch (r_size) {
+ case 32:
+ attr.type = ATTRIBUTE_INT;
+ break;
+ case 16:
+ attr.type = ATTRIBUTE_SHORT;
+ break;
+ case 10:
+ attr.type = ATTRIBUTE_INT2_10_10_10;
+ break;
+ case 8:
+ attr.type = ATTRIBUTE_BYTE;
+ break;
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ attr.type = ATTRIBUTE_BYTE;
+ abort();
+ }
+ break;
+
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ abort();
+ }
+ }
+ }
+
+ /* Set up the default attribute values in case any of the vertex
+ * elements use them.
+ */
+ so->default_attribute_values = vc5_bo_alloc(vc5->screen,
+ VC5_MAX_ATTRIBUTES *
+ 4 * sizeof(float),
+ "default attributes");
+ uint32_t *attrs = vc5_bo_map(so->default_attribute_values);
+ for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) {
+ attrs[i * 4 + 0] = 0;
+ attrs[i * 4 + 1] = 0;
+ attrs[i * 4 + 2] = 0;
+ if (i < so->num_elements &&
+ util_format_is_pure_integer(so->pipe[i].src_format)) {
+ attrs[i * 4 + 3] = 1;
+ } else {
+ attrs[i * 4 + 3] = fui(1.0);
+ }
+ }
+
+ return so;
+}
+
+static void
+vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->vtx = hwcso;
+ vc5->dirty |= VC5_DIRTY_VTXSTATE;
+}
+
+static void
+vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+ const struct pipe_constant_buffer *cb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader];
+
+ util_copy_constant_buffer(&so->cb[index], cb);
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (unlikely(!cb)) {
+ so->enabled_mask &= ~(1 << index);
+ so->dirty_mask &= ~(1 << index);
+ return;
+ }
+
+ so->enabled_mask |= 1 << index;
+ so->dirty_mask |= 1 << index;
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+}
+
+static void
+vc5_set_framebuffer_state(struct pipe_context *pctx,
+ const struct pipe_framebuffer_state *framebuffer)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct pipe_framebuffer_state *cso = &vc5->framebuffer;
+
+ vc5->job = NULL;
+
+ util_copy_framebuffer_state(cso, framebuffer);
+
+ vc5->swap_color_rb = 0;
+ vc5->blend_dst_alpha_one = 0;
+ for (int i = 0; i < vc5->framebuffer.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i];
+ if (!cbuf)
+ continue;
+
+ const struct util_format_description *desc =
+ util_format_description(cbuf->format);
+
+ /* For BGRA8 formats (DRI window system default format), we
+ * need to swap R and B, since the HW's format is RGBA8.
+ */
+ if (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
+ cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) {
+ vc5->swap_color_rb |= 1 << i;
+ }
+
+ if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+ vc5->blend_dst_alpha_one |= 1 << i;
+ }
+
+ vc5->dirty |= VC5_DIRTY_FRAMEBUFFER;
+}
+
+static struct vc5_texture_stateobj *
+vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader)
+{
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ vc5->dirty |= VC5_DIRTY_FRAGTEX;
+ return &vc5->fragtex;
+ break;
+ case PIPE_SHADER_VERTEX:
+ vc5->dirty |= VC5_DIRTY_VERTTEX;
+ return &vc5->verttex;
+ break;
+ default:
+ fprintf(stderr, "Unknown shader target %d\n", shader);
+ abort();
+ }
+}
+
+static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
+{
+ switch (pipe_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ unreachable("Unknown wrap mode");
+ }
+}
+
+
+static void *
+vc5_create_sampler_state(struct pipe_context *pctx,
+ const struct pipe_sampler_state *cso)
+{
+ MAYBE_UNUSED struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+#if V3D_VERSION >= 40
+ so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE),
+ "sampler");
+ void *map = vc5_bo_map(so->bo);
+
+ v3dx_pack(map, SAMPLER_STATE, sampler) {
+ sampler.wrap_i_border = false;
+
+ sampler.wrap_s = translate_wrap(cso->wrap_s, either_nearest);
+ sampler.wrap_t = translate_wrap(cso->wrap_t, either_nearest);
+ sampler.wrap_r = translate_wrap(cso->wrap_r, either_nearest);
+
+ sampler.fixed_bias = cso->lod_bias;
+ sampler.depth_compare_function = cso->compare_func;
+
+ sampler.min_filter_nearest =
+ cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
+ sampler.mag_filter_nearest =
+ cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
+ sampler.mip_filter_nearest =
+ cso->min_mip_filter != PIPE_TEX_MIPFILTER_LINEAR;
+
+ sampler.min_level_of_detail = MIN2(MAX2(0, cso->min_lod),
+ 15);
+ sampler.max_level_of_detail = MIN2(cso->max_lod, 15);
+
+ if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ sampler.min_level_of_detail = 0;
+ sampler.max_level_of_detail = 0;
+ }
+
+ if (cso->max_anisotropy) {
+ sampler.anisotropy_enable = true;
+
+ if (cso->max_anisotropy > 8)
+ sampler.maximum_anisotropy = 3;
+ else if (cso->max_anisotropy > 4)
+ sampler.maximum_anisotropy = 2;
+ else if (cso->max_anisotropy > 2)
+ sampler.maximum_anisotropy = 1;
+ }
+
+ sampler.border_colour_mode = V3D_BORDER_COLOUR_FOLLOWS;
+ /* XXX: The border colour field is in the TMU blending format
+ * (32, f16, or i16), and we need to customize it based on
+ * that.
+ *
+ * XXX: for compat alpha formats, we need the alpha field to
+ * be in the red channel.
+ */
+ sampler.border_colour_red =
+ util_float_to_half(cso->border_color.f[0]);
+ sampler.border_colour_green =
+ util_float_to_half(cso->border_color.f[1]);
+ sampler.border_colour_blue =
+ util_float_to_half(cso->border_color.f[2]);
+ sampler.border_colour_alpha =
+ util_float_to_half(cso->border_color.f[3]);
+ }
+
+#else /* V3D_VERSION < 40 */
+ v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
+ p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest);
+ p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest);
+ p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest);
+ }
+
+ v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+ tex.depth_compare_function = cso->compare_func;
+ tex.fixed_bias = cso->lod_bias;
+ }
+#endif /* V3D_VERSION < 40 */
+ return so;
+}
+
+static void
+vc5_sampler_states_bind(struct pipe_context *pctx,
+ enum pipe_shader_type shader, unsigned start,
+ unsigned nr, void **hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+
+ assert(start == 0);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ for (i = 0; i < nr; i++) {
+ if (hwcso[i])
+ new_nr = i + 1;
+ stage_tex->samplers[i] = hwcso[i];
+ }
+
+ for (; i < stage_tex->num_samplers; i++) {
+ stage_tex->samplers[i] = NULL;
+ }
+
+ stage_tex->num_samplers = new_nr;
+}
+
+static void
+vc5_sampler_state_delete(struct pipe_context *pctx,
+ void *hwcso)
+{
+ struct pipe_sampler_state *psampler = hwcso;
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+ vc5_bo_unreference(&sampler->bo);
+ free(psampler);
+}
+
+#if V3D_VERSION >= 40
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+ switch (pipe_swizzle) {
+ case PIPE_SWIZZLE_0:
+ return 0;
+ case PIPE_SWIZZLE_1:
+ return 1;
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W:
+ return 2 + pipe_swizzle;
+ default:
+ unreachable("unknown swizzle");
+ }
+}
+#endif
+
+static struct pipe_sampler_view *
+vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
+ const struct pipe_sampler_view *cso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_screen *screen = vc5->screen;
+ struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ pipe_reference(NULL, &prsc->reference);
+
+ /* Compute the sampler view's swizzle up front. This will be plugged
+ * into either the sampler (for 16-bit returns) or the shader's
+ * texture key (for 32)
+ */
+ uint8_t view_swizzle[4] = {
+ cso->swizzle_r,
+ cso->swizzle_g,
+ cso->swizzle_b,
+ cso->swizzle_a
+ };
+ const uint8_t *fmt_swizzle =
+ vc5_get_format_swizzle(&screen->devinfo, so->base.format);
+ util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle);
+
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ int msaa_scale = prsc->nr_samples > 1 ? 2 : 1;
+
+#if V3D_VERSION >= 40
+ so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE),
+ "sampler");
+ void *map = vc5_bo_map(so->bo);
+
+ v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
+#else /* V3D_VERSION < 40 */
+ v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+#endif
+
+ tex.image_width = prsc->width0 * msaa_scale;
+ tex.image_height = prsc->height0 * msaa_scale;
+
+#if V3D_VERSION >= 40
+ /* On 4.x, the height of a 1D texture is redefined to be the
+ * upper 14 bits of the width (which is only usable with txf).
+ */
+ if (prsc->target == PIPE_TEXTURE_1D ||
+ prsc->target == PIPE_TEXTURE_1D_ARRAY) {
+ tex.image_height = tex.image_width >> 14;
+ }
+#endif
+
+ if (prsc->target == PIPE_TEXTURE_3D) {
+ tex.image_depth = prsc->depth0;
+ } else {
+ tex.image_depth = (cso->u.tex.last_layer -
+ cso->u.tex.first_layer) + 1;
+ }
+
+ tex.srgb = util_format_is_srgb(cso->format);
+
+ tex.base_level = cso->u.tex.first_level;
+#if V3D_VERSION >= 40
+ tex.max_level = cso->u.tex.last_level;
+ /* Note that we don't have a job to reference the texture's sBO
+ * at state create time, so any time this sampler view is used
+ * we need to add the texture to the job.
+ */
+ tex.texture_base_pointer = cl_address(NULL,
+ rsc->bo->offset +
+ rsc->slices[0].offset),
+
+ tex.swizzle_r = translate_swizzle(so->swizzle[0]);
+ tex.swizzle_g = translate_swizzle(so->swizzle[1]);
+ tex.swizzle_b = translate_swizzle(so->swizzle[2]);
+ tex.swizzle_a = translate_swizzle(so->swizzle[3]);
+#endif
+ tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
+
+ if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
+ /* Using texture views to reinterpret formats on our
+ * MSAA textures won't work, because we don't lay out
+ * the bits in memory as it's expected -- for example,
+ * RGBA8 and RGB10_A2 are compatible in the
+ * ARB_texture_view spec, but in HW we lay them out as
+ * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now
+ * to catch failures.
+ *
+ * We explicitly allow remapping S8Z24 to RGBA8888 for
+ * vc5_blit.c's stencil blits.
+ */
+ assert((util_format_linear(cso->format) ==
+ util_format_linear(prsc->format)) ||
+ (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
+ cso->format == PIPE_FORMAT_R8G8B8A8_UNORM));
+ uint32_t output_image_format =
+ vc5_get_rt_format(&screen->devinfo, cso->format);
+ uint32_t internal_type;
+ uint32_t internal_bpp;
+ vc5_get_internal_type_bpp_for_output_format(&screen->devinfo,
+ output_image_format,
+ &internal_type,
+ &internal_bpp);
+
+ switch (internal_type) {
+ case V3D_INTERNAL_TYPE_8:
+ tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
+ break;
+ case V3D_INTERNAL_TYPE_16F:
+ tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
+ break;
+ default:
+ unreachable("Bad MSAA texture type");
+ }
+
+ /* sRGB was stored in the tile buffer as linear and
+ * would have been encoded to sRGB on resolved tile
+ * buffer store. Note that this means we would need
+ * shader code if we wanted to read an MSAA sRGB
+ * texture without sRGB decode.
+ */
+ tex.srgb = false;
+ } else {
+ tex.texture_type = vc5_get_tex_format(&screen->devinfo,
+ cso->format);
+ }
+
+ /* Since other platform devices may produce UIF images even
+ * when they're not big enough for V3D to assume they're UIF,
+ * we force images with level 0 as UIF to be always treated
+ * that way.
+ */
+ tex.level_0_is_strictly_uif = (rsc->slices[0].tiling ==
+ VC5_TILING_UIF_XOR ||
+ rsc->slices[0].tiling ==
+ VC5_TILING_UIF_NO_XOR);
+ tex.level_0_xor_enable = (rsc->slices[0].tiling ==
+ VC5_TILING_UIF_XOR);
+
+ if (tex.level_0_is_strictly_uif)
+ tex.level_0_ub_pad = rsc->slices[0].ub_pad;
+
+#if V3D_VERSION >= 40
+ if (tex.uif_xor_disable ||
+ tex.level_0_is_strictly_uif) {
+ tex.extended = true;
+ }
+#endif /* V3D_VERSION >= 40 */
+ };
+
+ return &so->base;
+}
+
+static void
+vc5_sampler_view_destroy(struct pipe_context *pctx,
+ struct pipe_sampler_view *psview)
+{
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+
+ vc5_bo_unreference(&sview->bo);
+ pipe_resource_reference(&psview->texture, NULL);
+ free(psview);
+}
+
+static void
+vc5_set_sampler_views(struct pipe_context *pctx,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ assert(start == 0);
+
+ for (i = 0; i < nr; i++) {
+ if (views[i])
+ new_nr = i + 1;
+ pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
+ }
+
+ for (; i < stage_tex->num_textures; i++) {
+ pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
+ }
+
+ stage_tex->num_textures = new_nr;
+}
+
+static struct pipe_stream_output_target *
+vc5_create_stream_output_target(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ pipe_reference_init(&target->reference, 1);
+ pipe_resource_reference(&target->buffer, prsc);
+
+ target->context = pctx;
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+
+ return target;
+}
+
+static void
+vc5_stream_output_target_destroy(struct pipe_context *pctx,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ free(target);
+}
+
+static void
+vc5_set_stream_output_targets(struct pipe_context *pctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct vc5_context *ctx = vc5_context(pctx);
+ struct vc5_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
+
+ assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (i = 0; i < num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+
+ for (; i < so->num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], NULL);
+
+ so->num_targets = num_targets;
+
+ ctx->dirty |= VC5_DIRTY_STREAMOUT;
+}
+
+void
+v3dX(state_init)(struct pipe_context *pctx)
+{
+ pctx->set_blend_color = vc5_set_blend_color;
+ pctx->set_stencil_ref = vc5_set_stencil_ref;
+ pctx->set_clip_state = vc5_set_clip_state;
+ pctx->set_sample_mask = vc5_set_sample_mask;
+ pctx->set_constant_buffer = vc5_set_constant_buffer;
+ pctx->set_framebuffer_state = vc5_set_framebuffer_state;
+ pctx->set_polygon_stipple = vc5_set_polygon_stipple;
+ pctx->set_scissor_states = vc5_set_scissor_states;
+ pctx->set_viewport_states = vc5_set_viewport_states;
+
+ pctx->set_vertex_buffers = vc5_set_vertex_buffers;
+
+ pctx->create_blend_state = vc5_create_blend_state;
+ pctx->bind_blend_state = vc5_blend_state_bind;
+ pctx->delete_blend_state = vc5_generic_cso_state_delete;
+
+ pctx->create_rasterizer_state = vc5_create_rasterizer_state;
+ pctx->bind_rasterizer_state = vc5_rasterizer_state_bind;
+ pctx->delete_rasterizer_state = vc5_generic_cso_state_delete;
+
+ pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state;
+ pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind;
+ pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete;
+
+ pctx->create_vertex_elements_state = vc5_vertex_state_create;
+ pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete;
+ pctx->bind_vertex_elements_state = vc5_vertex_state_bind;
+
+ pctx->create_sampler_state = vc5_create_sampler_state;
+ pctx->delete_sampler_state = vc5_sampler_state_delete;
+ pctx->bind_sampler_states = vc5_sampler_states_bind;
+
+ pctx->create_sampler_view = vc5_create_sampler_view;
+ pctx->sampler_view_destroy = vc5_sampler_view_destroy;
+ pctx->set_sampler_views = vc5_set_sampler_views;
+
+ pctx->create_stream_output_target = vc5_create_stream_output_target;
+ pctx->stream_output_target_destroy = vc5_stream_output_target_destroy;
+ pctx->set_stream_output_targets = vc5_set_stream_output_targets;
+}