summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc5
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/vc5')
-rw-r--r--src/gallium/drivers/vc5/.editorconfig3
-rw-r--r--src/gallium/drivers/vc5/Automake.inc14
-rw-r--r--src/gallium/drivers/vc5/Makefile.am40
-rw-r--r--src/gallium/drivers/vc5/Makefile.sources26
-rw-r--r--src/gallium/drivers/vc5/vc5_blit.c226
-rw-r--r--src/gallium/drivers/vc5/vc5_bufmgr.c580
-rw-r--r--src/gallium/drivers/vc5/vc5_bufmgr.h140
-rw-r--r--src/gallium/drivers/vc5/vc5_cl.c87
-rw-r--r--src/gallium/drivers/vc5/vc5_cl.h246
-rw-r--r--src/gallium/drivers/vc5/vc5_context.c171
-rw-r--r--src/gallium/drivers/vc5/vc5_context.h466
-rw-r--r--src/gallium/drivers/vc5/vc5_draw.c607
-rw-r--r--src/gallium/drivers/vc5/vc5_drm.h191
-rw-r--r--src/gallium/drivers/vc5/vc5_emit.c449
-rw-r--r--src/gallium/drivers/vc5/vc5_fence.c93
-rw-r--r--src/gallium/drivers/vc5/vc5_formats.c415
-rw-r--r--src/gallium/drivers/vc5/vc5_job.c429
-rw-r--r--src/gallium/drivers/vc5/vc5_program.c565
-rw-r--r--src/gallium/drivers/vc5/vc5_query.c91
-rw-r--r--src/gallium/drivers/vc5/vc5_rcl.c218
-rw-r--r--src/gallium/drivers/vc5/vc5_resource.c758
-rw-r--r--src/gallium/drivers/vc5/vc5_resource.h158
-rw-r--r--src/gallium/drivers/vc5/vc5_screen.c620
-rw-r--r--src/gallium/drivers/vc5/vc5_screen.h99
-rw-r--r--src/gallium/drivers/vc5/vc5_simulator.c736
-rw-r--r--src/gallium/drivers/vc5/vc5_state.c663
-rw-r--r--src/gallium/drivers/vc5/vc5_tiling.c402
-rw-r--r--src/gallium/drivers/vc5/vc5_tiling.h43
-rw-r--r--src/gallium/drivers/vc5/vc5_uniforms.c417
29 files changed, 8953 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc5/.editorconfig b/src/gallium/drivers/vc5/.editorconfig
new file mode 100644
index 00000000000..f3d8c479154
--- /dev/null
+++ b/src/gallium/drivers/vc5/.editorconfig
@@ -0,0 +1,3 @@
+[*.{c,h}]
+indent_style = space
+indent_size = 8
diff --git a/src/gallium/drivers/vc5/Automake.inc b/src/gallium/drivers/vc5/Automake.inc
new file mode 100644
index 00000000000..57c8a28efed
--- /dev/null
+++ b/src/gallium/drivers/vc5/Automake.inc
@@ -0,0 +1,14 @@
+if HAVE_GALLIUM_VC5
+
+TARGET_DRIVERS += vc5
+TARGET_CPPFLAGS += -DGALLIUM_VC5
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/winsys/vc5/drm/libvc5drm.la \
+ $(top_builddir)/src/gallium/drivers/vc5/libvc5.la \
+ $(top_builddir)/src/broadcom/libbroadcom.la
+
+if !HAVE_GALLIUM_VC4
+TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
+endif
+
+endif
diff --git a/src/gallium/drivers/vc5/Makefile.am b/src/gallium/drivers/vc5/Makefile.am
new file mode 100644
index 00000000000..42d4be73d26
--- /dev/null
+++ b/src/gallium/drivers/vc5/Makefile.am
@@ -0,0 +1,40 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_builddir)/src/broadcom \
+ $(LIBDRM_CFLAGS) \
+ $(VC5_SIMULATOR_CFLAGS) \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $()
+
+noinst_LTLIBRARIES = libvc5.la
+
+libvc5_la_SOURCES = $(C_SOURCES)
+
+libvc5_la_LDFLAGS = \
+ $(VC5_SIMULATOR_LIBS) \
+ $(NULL)
diff --git a/src/gallium/drivers/vc5/Makefile.sources b/src/gallium/drivers/vc5/Makefile.sources
new file mode 100644
index 00000000000..0d54f830bb2
--- /dev/null
+++ b/src/gallium/drivers/vc5/Makefile.sources
@@ -0,0 +1,26 @@
+C_SOURCES := \
+ vc5_blit.c \
+ vc5_bufmgr.c \
+ vc5_bufmgr.h \
+ vc5_cl.c \
+ vc5_cl.h \
+ vc5_context.c \
+ vc5_context.h \
+ vc5_draw.c \
+ vc5_emit.c \
+ vc5_fence.c \
+ vc5_formats.c \
+ vc5_job.c \
+ vc5_program.c \
+ vc5_query.c \
+ vc5_rcl.c \
+ vc5_resource.c \
+ vc5_resource.h \
+ vc5_screen.c \
+ vc5_screen.h \
+ vc5_simulator.c \
+ vc5_state.c \
+ vc5_tiling.c \
+ vc5_tiling.h \
+ vc5_uniforms.c \
+ $()
diff --git a/src/gallium/drivers/vc5/vc5_blit.c b/src/gallium/drivers/vc5/vc5_blit.c
new file mode 100644
index 00000000000..64811416e50
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_blit.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2015-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_blitter.h"
+#include "vc5_context.h"
+
+#if 0
+static struct pipe_surface *
+vc5_get_blit_surface(struct pipe_context *pctx,
+ struct pipe_resource *prsc, unsigned level)
+{
+ struct pipe_surface tmpl;
+
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.format = prsc->format;
+ tmpl.u.tex.level = level;
+ tmpl.u.tex.first_layer = 0;
+ tmpl.u.tex.last_layer = 0;
+
+ return pctx->create_surface(pctx, prsc, &tmpl);
+}
+
+static bool
+is_tile_unaligned(unsigned size, unsigned tile_size)
+{
+ return size & (tile_size - 1);
+}
+
+static bool
+vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ bool msaa = (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1);
+ int tile_width = msaa ? 32 : 64;
+ int tile_height = msaa ? 32 : 64;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format))
+ return false;
+
+ if (info->scissor_enable)
+ return false;
+
+ if ((info->mask & PIPE_MASK_RGBA) == 0)
+ return false;
+
+ if (info->dst.box.x != info->src.box.x ||
+ info->dst.box.y != info->src.box.y ||
+ info->dst.box.width != info->src.box.width ||
+ info->dst.box.height != info->src.box.height) {
+ return false;
+ }
+
+ int dst_surface_width = u_minify(info->dst.resource->width0,
+ info->dst.level);
+ int dst_surface_height = u_minify(info->dst.resource->height0,
+ info->dst.level);
+ if (is_tile_unaligned(info->dst.box.x, tile_width) ||
+ is_tile_unaligned(info->dst.box.y, tile_height) ||
+ (is_tile_unaligned(info->dst.box.width, tile_width) &&
+ info->dst.box.x + info->dst.box.width != dst_surface_width) ||
+ (is_tile_unaligned(info->dst.box.height, tile_height) &&
+ info->dst.box.y + info->dst.box.height != dst_surface_height)) {
+ return false;
+ }
+
+ /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
+ * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
+ * destination surface) to determine the stride. This may be wrong
+ * when reading from texture miplevels > 0, which are stored in
+ * POT-sized areas. For MSAA, the tile addresses are computed
+ * explicitly by the RCL, but still use the destination width to
+ * determine the stride (which could be fixed by explicitly supplying
+ * it in the ABI).
+ */
+ struct vc5_resource *rsc = vc5_resource(info->src.resource);
+
+ uint32_t stride;
+
+ if (info->src.resource->nr_samples > 1)
+ stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
+ /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
+ stride = align(dst_surface_width * rsc->cpp, 128); */
+ else
+ stride = align(dst_surface_width * rsc->cpp, 16);
+
+ if (stride != rsc->slices[info->src.level].stride)
+ return false;
+
+ if (info->dst.resource->format != info->src.resource->format)
+ return false;
+
+ if (false) {
+ fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
+ info->src.box.x,
+ info->src.box.y,
+ info->dst.box.x,
+ info->dst.box.y,
+ info->dst.box.width,
+ info->dst.box.height);
+ }
+
+ struct pipe_surface *dst_surf =
+ vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level);
+ struct pipe_surface *src_surf =
+ vc5_get_blit_surface(pctx, info->src.resource, info->src.level);
+
+ vc5_flush_jobs_reading_resource(vc5, info->src.resource);
+
+ struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL);
+ pipe_surface_reference(&job->color_read, src_surf);
+
+ /* If we're resolving from MSAA to single sample, we still need to run
+ * the engine in MSAA mode for the load.
+ */
+ if (!job->msaa && info->src.resource->nr_samples > 1) {
+ job->msaa = true;
+ job->tile_width = 32;
+ job->tile_height = 32;
+ }
+
+ job->draw_min_x = info->dst.box.x;
+ job->draw_min_y = info->dst.box.y;
+ job->draw_max_x = info->dst.box.x + info->dst.box.width;
+ job->draw_max_y = info->dst.box.y + info->dst.box.height;
+ job->draw_width = dst_surf->width;
+ job->draw_height = dst_surf->height;
+
+ job->tile_width = tile_width;
+ job->tile_height = tile_height;
+ job->msaa = msaa;
+ job->needs_flush = true;
+ job->resolve |= PIPE_CLEAR_COLOR;
+
+ vc5_job_submit(vc5, job);
+
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_surface_reference(&src_surf, NULL);
+
+ return true;
+}
+#endif
+
+void
+vc5_blitter_save(struct vc5_context *vc5)
+{
+ util_blitter_save_fragment_constant_buffer_slot(vc5->blitter,
+ vc5->constbuf[PIPE_SHADER_FRAGMENT].cb);
+ util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb);
+ util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx);
+ util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+ util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer);
+ util_blitter_save_viewport(vc5->blitter, &vc5->viewport);
+ util_blitter_save_scissor(vc5->blitter, &vc5->scissor);
+ util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs);
+ util_blitter_save_blend(vc5->blitter, vc5->blend);
+ util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa);
+ util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref);
+ util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask);
+ util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer);
+ util_blitter_save_fragment_sampler_states(vc5->blitter,
+ vc5->fragtex.num_samplers,
+ (void **)vc5->fragtex.samplers);
+ util_blitter_save_fragment_sampler_views(vc5->blitter,
+ vc5->fragtex.num_textures, vc5->fragtex.textures);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+}
+
+static bool
+vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(ctx);
+
+ if (!util_blitter_is_blit_supported(vc5->blitter, info)) {
+ fprintf(stderr, "blit unsupported %s -> %s\n",
+ util_format_short_name(info->src.resource->format),
+ util_format_short_name(info->dst.resource->format));
+ return false;
+ }
+
+ vc5_blitter_save(vc5);
+ util_blitter_blit(vc5->blitter, info);
+
+ return true;
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+void
+vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
+{
+ struct pipe_blit_info info = *blit_info;
+
+#if 0
+ if (vc5_tile_blit(pctx, blit_info))
+ return;
+#endif
+
+ vc5_render_blit(pctx, &info);
+}
diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.c b/src/gallium/drivers/vc5/vc5_bufmgr.c
new file mode 100644
index 00000000000..c6c06dcfda7
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_bufmgr.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <err.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+
+#include "vc5_context.h"
+#include "vc5_screen.h"
+
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#include <memcheck.h>
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+static bool dump_stats = false;
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache);
+
+static void
+vc5_bo_dump_stats(struct vc5_screen *screen)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
+ fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024);
+ fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
+ fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024);
+
+ if (!list_empty(&cache->time_list)) {
+ struct vc5_bo *first = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.next,
+ time_list);
+ struct vc5_bo *last = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.prev,
+ time_list);
+
+ fprintf(stderr, " oldest cache time: %ld\n",
+ (long)first->free_time);
+ fprintf(stderr, " newest cache time: %ld\n",
+ (long)last->free_time);
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ fprintf(stderr, " now: %ld\n",
+ time.tv_sec);
+ }
+}
+
+static void
+vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo)
+{
+ list_del(&bo->time_list);
+ list_del(&bo->size_list);
+ cache->bo_count--;
+ cache->bo_size -= bo->size;
+}
+
+static struct vc5_bo *
+vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = size / 4096 - 1;
+
+ if (cache->size_list_size <= page_index)
+ return NULL;
+
+ struct vc5_bo *bo = NULL;
+ mtx_lock(&cache->lock);
+ if (!list_empty(&cache->size_list[page_index])) {
+ bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next,
+ size_list);
+
+ /* Check that the BO has gone idle. If not, then we want to
+ * allocate something new instead, since we assume that the
+ * user will proceed to CPU map it and fill it with stuff.
+ */
+ if (!vc5_bo_wait(bo, 0, NULL)) {
+ mtx_unlock(&cache->lock);
+ return NULL;
+ }
+
+ pipe_reference_init(&bo->reference, 1);
+ vc5_bo_remove_from_cache(cache, bo);
+
+ bo->name = name;
+ }
+ mtx_unlock(&cache->lock);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo *bo;
+ int ret;
+
+ size = align(size, 4096);
+
+ bo = vc5_bo_from_cache(screen, size, name);
+ if (bo) {
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb from cache:\n",
+ name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ return bo;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = size;
+ bo->name = name;
+ bo->private = true;
+
+ retry:
+ ;
+
+ bool cleared_and_retried = false;
+ struct drm_vc5_create_bo create = {
+ .size = size
+ };
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_CREATE_BO, &create);
+ bo->handle = create.handle;
+ bo->offset = create.offset;
+
+ if (ret != 0) {
+ if (!list_empty(&screen->bo_cache.time_list) &&
+ !cleared_and_retried) {
+ cleared_and_retried = true;
+ vc5_bo_cache_free_all(&screen->bo_cache);
+ goto retry;
+ }
+
+ free(bo);
+ return NULL;
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ return bo;
+}
+
+void
+vc5_bo_last_unreference(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ mtx_lock(&screen->bo_cache.lock);
+ vc5_bo_last_unreference_locked_timed(bo, time.tv_sec);
+ mtx_unlock(&screen->bo_cache.lock);
+}
+
+static void
+vc5_bo_free(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (bo->map) {
+ if (using_vc5_simulator && bo->name &&
+ strcmp(bo->name, "winsys") == 0) {
+ free(bo->map);
+ } else {
+ munmap(bo->map, bo->size);
+ VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+ }
+ }
+
+ struct drm_gem_close c;
+ memset(&c, 0, sizeof(c));
+ c.handle = bo->handle;
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
+ if (ret != 0)
+ fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+
+ screen->bo_count--;
+ screen->bo_size -= bo->size;
+
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s%s%dkb:\n",
+ bo->name ? bo->name : "",
+ bo->name ? " " : "",
+ bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ free(bo);
+}
+
+static void
+free_stale_bos(struct vc5_screen *screen, time_t time)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ bool freed_any = false;
+
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ if (dump_stats && !freed_any) {
+ fprintf(stderr, "Freeing stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ freed_any = true;
+ }
+
+ /* If it's more than a second old, free it. */
+ if (time - bo->free_time > 2) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ } else {
+ break;
+ }
+ }
+
+ if (dump_stats && freed_any) {
+ fprintf(stderr, "Freed stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache)
+{
+ mtx_lock(&cache->lock);
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ }
+ mtx_unlock(&cache->lock);
+}
+
+void
+vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time)
+{
+ struct vc5_screen *screen = bo->screen;
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = bo->size / 4096 - 1;
+
+ if (!bo->private) {
+ vc5_bo_free(bo);
+ return;
+ }
+
+ if (cache->size_list_size <= page_index) {
+ struct list_head *new_list =
+ ralloc_array(screen, struct list_head, page_index + 1);
+
+ /* Move old list contents over (since the array has moved, and
+ * therefore the pointers to the list heads have to change).
+ */
+ for (int i = 0; i < cache->size_list_size; i++) {
+ struct list_head *old_head = &cache->size_list[i];
+ if (list_empty(old_head))
+ list_inithead(&new_list[i]);
+ else {
+ new_list[i].next = old_head->next;
+ new_list[i].prev = old_head->prev;
+ new_list[i].next->prev = &new_list[i];
+ new_list[i].prev->next = &new_list[i];
+ }
+ }
+ for (int i = cache->size_list_size; i < page_index + 1; i++)
+ list_inithead(&new_list[i]);
+
+ cache->size_list = new_list;
+ cache->size_list_size = page_index + 1;
+ }
+
+ bo->free_time = time;
+ list_addtail(&bo->size_list, &cache->size_list[page_index]);
+ list_addtail(&bo->time_list, &cache->time_list);
+ cache->bo_count++;
+ cache->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s %dkb to cache:\n",
+ bo->name, bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ bo->name = NULL;
+
+ free_stale_bos(screen, time);
+}
+
+static struct vc5_bo *
+vc5_bo_open_handle(struct vc5_screen *screen,
+ uint32_t winsys_stride,
+ uint32_t handle, uint32_t size)
+{
+ struct vc5_bo *bo;
+
+ assert(size);
+
+ mtx_lock(&screen->bo_handles_mutex);
+
+ bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+ if (bo) {
+ pipe_reference(NULL, &bo->reference);
+ goto done;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->handle = handle;
+ bo->size = size;
+ bo->name = "winsys";
+ bo->private = false;
+
+#ifdef USE_VC5_SIMULATOR
+ vc5_simulator_open_from_handle(screen->fd, winsys_stride,
+ bo->handle, bo->size);
+ bo->map = malloc(bo->size);
+#endif
+
+ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+ mtx_unlock(&screen->bo_handles_mutex);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride)
+{
+ struct drm_gem_open o = {
+ .name = name
+ };
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o);
+ if (ret) {
+ fprintf(stderr, "Failed to open bo %d: %s\n",
+ name, strerror(errno));
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+}
+
+struct vc5_bo *
+vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride)
+{
+ uint32_t handle;
+ int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
+ int size;
+ if (ret) {
+ fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd);
+ return NULL;
+ }
+
+ /* Determine the size of the bo we were handed. */
+ size = lseek(fd, 0, SEEK_END);
+ if (size == -1) {
+ fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd);
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, handle, size);
+}
+
+int
+vc5_bo_get_dmabuf(struct vc5_bo *bo)
+{
+ int fd;
+ int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle,
+ O_CLOEXEC, &fd);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to export gem bo %d to dmabuf\n",
+ bo->handle);
+ return -1;
+ }
+
+ mtx_lock(&bo->screen->bo_handles_mutex);
+ bo->private = false;
+ util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+ mtx_unlock(&bo->screen->bo_handles_mutex);
+
+ return fd;
+}
+
+bool
+vc5_bo_flink(struct vc5_bo *bo, uint32_t *name)
+{
+ struct drm_gem_flink flink = {
+ .handle = bo->handle,
+ };
+ int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink);
+ if (ret) {
+ fprintf(stderr, "Failed to flink bo %d: %s\n",
+ bo->handle, strerror(errno));
+ free(bo);
+ return false;
+ }
+
+ bo->private = false;
+ *name = flink.name;
+
+ return true;
+}
+
+static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+ struct drm_vc5_wait_seqno wait = {
+ .seqno = seqno,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason)
+{
+ if (screen->finished_seqno >= seqno)
+ return true;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on seqno %lld for %s\n",
+ (long long)seqno, reason);
+ }
+ }
+
+ int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ screen->finished_seqno = seqno;
+ return true;
+}
+
+static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ struct drm_vc5_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
+ }
+
+ int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo)
+{
+ uint64_t offset;
+ int ret;
+
+ if (bo->map)
+ return bo->map;
+
+ struct drm_vc5_mmap_bo map;
+ memset(&map, 0, sizeof(map));
+ map.handle = bo->handle;
+ ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_VC5_MMAP_BO, &map);
+ offset = map.offset;
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bo->screen->fd, offset);
+ if (bo->map == MAP_FAILED) {
+ fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ bo->handle, (long long)offset, bo->size);
+ abort();
+ }
+ VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
+
+ return bo->map;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo)
+{
+ void *map = vc5_bo_map_unsynchronized(bo);
+
+ bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
+ if (!ok) {
+ fprintf(stderr, "BO wait for map failed\n");
+ abort();
+ }
+
+ return map;
+}
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ vc5_bo_cache_free_all(cache);
+
+ if (dump_stats) {
+ fprintf(stderr, "BO stats after screen destroy:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.h b/src/gallium/drivers/vc5/vc5_bufmgr.h
new file mode 100644
index 00000000000..cca2b22874f
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_bufmgr.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_BUFMGR_H
+#define VC5_BUFMGR_H
+
+#include <stdint.h>
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/list.h"
+#include "vc5_screen.h"
+
+struct vc5_context;
+
+struct vc5_bo {
+ struct pipe_reference reference;
+ struct vc5_screen *screen;
+ void *map;
+ const char *name;
+ uint32_t handle;
+ uint32_t size;
+
+ /* Address of the BO in our page tables. */
+ uint32_t offset;
+
+ /** Entry in the linked list of buffers freed, by age. */
+ struct list_head time_list;
+ /** Entry in the per-page-count linked list of buffers freed (by age). */
+ struct list_head size_list;
+ /** Approximate second when the bo was freed. */
+ time_t free_time;
+ /**
+ * Whether only our process has a reference to the BO (meaning that
+ * it's safe to reuse it in the BO cache).
+ */
+ bool private;
+};
+
+struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size,
+ const char *name);
+void vc5_bo_last_unreference(struct vc5_bo *bo);
+void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time);
+struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride);
+struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd,
+ uint32_t winsys_stride);
+bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name);
+int vc5_bo_get_dmabuf(struct vc5_bo *bo);
+
+static inline void
+vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo)
+{
+ if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
+ vc5_bo_last_unreference(*old_bo);
+ *old_bo = new_bo;
+}
+
+static inline struct vc5_bo *
+vc5_bo_reference(struct vc5_bo *bo)
+{
+ pipe_reference(NULL, &bo->reference);
+ return bo;
+}
+
+static inline void
+vc5_bo_unreference(struct vc5_bo **bo)
+{
+ struct vc5_screen *screen;
+ if (!*bo)
+ return;
+
+ if ((*bo)->private) {
+ /* Avoid the mutex for private BOs */
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference(*bo);
+ } else {
+ screen = (*bo)->screen;
+ mtx_lock(&screen->bo_handles_mutex);
+
+ if (pipe_reference(&(*bo)->reference, NULL)) {
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)(*bo)->handle);
+ vc5_bo_last_unreference(*bo);
+ }
+
+ mtx_unlock(&screen->bo_handles_mutex);
+ }
+
+ *bo = NULL;
+}
+
+static inline void
+vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time)
+{
+ if (!*bo)
+ return;
+
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference_locked_timed(*bo, time);
+ *bo = NULL;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo);
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo);
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason);
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen);
+
+#endif /* VC5_BUFMGR_H */
+
diff --git a/src/gallium/drivers/vc5/vc5_cl.c b/src/gallium/drivers/vc5/vc5_cl.c
new file mode 100644
index 00000000000..37d96c4360c
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_cl.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/ralloc.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+void
+vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl)
+{
+ cl->base = NULL;
+ cl->next = cl->base;
+ cl->size = 0;
+ cl->job = job;
+}
+
+uint32_t
+vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment)
+{
+ uint32_t offset = align(cl_offset(cl), alignment);
+
+ if (offset + space <= cl->size) {
+ cl->next = cl->base + offset;
+ return offset;
+ }
+
+ vc5_bo_unreference(&cl->bo);
+ cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL");
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+
+ return 0;
+}
+
+void
+vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space)
+{
+ if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
+ return;
+
+ struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL");
+ assert(space <= new_bo->size);
+
+ /* Chain to the new BO from the old one. */
+ if (cl->bo) {
+ cl_emit(cl, BRANCH, branch) {
+ branch.address = cl_address(new_bo, 0);
+ }
+ vc5_bo_unreference(&cl->bo);
+ } else {
+ /* Root the first RCL/BCL BO in the job. */
+ vc5_job_add_bo(cl->job, cl->bo);
+ }
+
+ cl->bo = new_bo;
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+}
+
+void
+vc5_destroy_cl(struct vc5_cl *cl)
+{
+ vc5_bo_unreference(&cl->bo);
+}
diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h
new file mode 100644
index 00000000000..e935eeff536
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_cl.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CL_H
+#define VC5_CL_H
+
+#include <stdint.h>
+
+#include "util/u_math.h"
+#include "util/macros.h"
+
+struct vc5_bo;
+struct vc5_job;
+struct vc5_cl;
+
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc5_cl_out;
+
+/** A reference to a BO used in the CL packing functions */
+struct vc5_cl_reloc {
+ struct vc5_bo *bo;
+ uint32_t offset;
+};
+
+static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *);
+
+#define __gen_user_data struct vc5_cl
+#define __gen_address_type struct vc5_cl_reloc
+#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
+ (reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+struct vc5_cl {
+ void *base;
+ struct vc5_job *job;
+ struct vc5_cl_out *next;
+ struct vc5_bo *bo;
+ uint32_t size;
+};
+
+void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl);
+void vc5_destroy_cl(struct vc5_cl *cl);
+void vc5_dump_cl(void *cl, uint32_t size, bool is_render);
+uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo);
+
+struct PACKED unaligned_16 { uint16_t x; };
+struct PACKED unaligned_32 { uint32_t x; };
+
+static inline uint32_t cl_offset(struct vc5_cl *cl)
+{
+ return (char *)cl->next - (char *)cl->base;
+}
+
+static inline void
+cl_advance(struct vc5_cl_out **cl, uint32_t n)
+{
+ (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n);
+}
+
+static inline struct vc5_cl_out *
+cl_start(struct vc5_cl *cl)
+{
+ return cl->next;
+}
+
+static inline void
+cl_end(struct vc5_cl *cl, struct vc5_cl_out *next)
+{
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
+
+
+static inline void
+put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val)
+{
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+cl_u8(struct vc5_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
+}
+
+static inline void
+cl_u16(struct vc5_cl_out **cl, uint16_t n)
+{
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
+
+static inline void
+cl_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_reloc(struct vc5_cl *cl,
+ struct vc5_cl_out **cl_out,
+ struct vc5_bo *bo, uint32_t offset)
+{
+ cl_aligned_u32(cl_out, bo->offset + offset);
+ vc5_job_add_bo(cl->job, bo);
+}
+
+static inline void
+cl_ptr(struct vc5_cl_out **cl, void *ptr)
+{
+ *(struct vc5_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
+}
+
+static inline void
+cl_f(struct vc5_cl_out **cl, float f)
+{
+ cl_u32(cl, fui(f));
+}
+
+static inline void
+cl_aligned_f(struct vc5_cl_out **cl, float f)
+{
+ cl_aligned_u32(cl, fui(f));
+}
+
+/**
+ * Reference to a BO with its associated offset, used in the pack process.
+ */
+static inline struct vc5_cl_reloc
+cl_address(struct vc5_bo *bo, uint32_t offset)
+{
+ struct vc5_cl_reloc reloc = {
+ .bo = bo,
+ .offset = offset,
+ };
+ return reloc;
+}
+
+uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align);
+void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size);
+
+#define cl_packet_header(packet) V3D33_ ## packet ## _header
+#define cl_packet_length(packet) V3D33_ ## packet ## _length
+#define cl_packet_pack(packet) V3D33_ ## packet ## _pack
+#define cl_packet_struct(packet) V3D33_ ## packet
+
+static inline void *
+cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
+{
+ void *addr = *cl;
+ cl_advance(cl, size);
+ return addr;
+}
+
+/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ * .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ */
+#define cl_emit(cl, packet, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ struct vc5_cl_out *cl_out = cl_start(cl); \
+ cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \
+ cl_packet_length(packet))); \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
+ })) \
+
+#define cl_emit_prepacked(cl, packet) do { \
+ memcpy((cl)->next, packet, sizeof(*packet)); \
+ cl_advance(&(cl)->next, sizeof(*packet)); \
+} while (0)
+
+/**
+ * Helper function called by the XML-generated pack functions for filling in
+ * an address field in shader records.
+ *
+ * Since we have a private address space as of VC5, our BOs can have lifelong
+ * offsets, and all the kernel needs to know is which BOs need to be paged in
+ * for this exec.
+ */
+static inline void
+cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc)
+{
+ if (reloc->bo)
+ vc5_job_add_bo(cl->job, reloc->bo);
+}
+
+#endif /* VC5_CL_H */
diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c
new file mode 100644
index 00000000000..f80020ab31e
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_context.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xf86drm.h>
+#include <err.h>
+
+#include "pipe/p_defines.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+#include "pipe/p_screen.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+
+void
+vc5_flush(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+static void
+vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (fence) {
+ struct pipe_screen *screen = pctx->screen;
+ struct vc5_fence *f = vc5_fence_create(vc5->screen,
+ vc5->last_emit_seqno);
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle *)f;
+ }
+}
+
+static void
+vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ rsc->initialized_buffers = 0;
+
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (!entry)
+ return;
+
+ struct vc5_job *job = entry->data;
+ if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+ job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+}
+
+static void
+vc5_context_destroy(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (vc5->blitter)
+ util_blitter_destroy(vc5->blitter);
+
+ if (vc5->primconvert)
+ util_primconvert_destroy(vc5->primconvert);
+
+ if (vc5->uploader)
+ u_upload_destroy(vc5->uploader);
+
+ slab_destroy_child(&vc5->transfer_pool);
+
+ pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL);
+ pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL);
+
+ vc5_program_fini(pctx);
+
+ ralloc_free(vc5);
+}
+
+struct pipe_context *
+vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_context *vc5;
+
+ /* Prevent dumping of the shaders built during context setup. */
+ uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB;
+ V3D_DEBUG &= ~V3D_DEBUG_SHADERDB;
+
+ vc5 = rzalloc(NULL, struct vc5_context);
+ if (!vc5)
+ return NULL;
+ struct pipe_context *pctx = &vc5->base;
+
+ vc5->screen = screen;
+
+ pctx->screen = pscreen;
+ pctx->priv = priv;
+ pctx->destroy = vc5_context_destroy;
+ pctx->flush = vc5_pipe_flush;
+ pctx->invalidate_resource = vc5_invalidate_resource;
+
+ vc5_draw_init(pctx);
+ vc5_state_init(pctx);
+ vc5_program_init(pctx);
+ vc5_query_init(pctx);
+ vc5_resource_context_init(pctx);
+
+ vc5_job_init(vc5);
+
+ vc5->fd = screen->fd;
+
+ slab_create_child(&vc5->transfer_pool, &screen->transfer_pool);
+
+ vc5->uploader = u_upload_create_default(&vc5->base);
+ vc5->base.stream_uploader = vc5->uploader;
+ vc5->base.const_uploader = vc5->uploader;
+
+ vc5->blitter = util_blitter_create(pctx);
+ if (!vc5->blitter)
+ goto fail;
+
+ vc5->primconvert = util_primconvert_create(pctx,
+ (1 << PIPE_PRIM_QUADS) - 1);
+ if (!vc5->primconvert)
+ goto fail;
+
+ V3D_DEBUG |= saved_shaderdb_flag;
+
+ vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+
+ return &vc5->base;
+
+fail:
+ pctx->destroy(pctx);
+ return NULL;
+}
diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h
new file mode 100644
index 00000000000..b8f3f784a85
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_context.h
@@ -0,0 +1,466 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CONTEXT_H
+#define VC5_CONTEXT_H
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/bitset.h"
+#include "util/slab.h"
+#include "xf86drm.h"
+#include "vc5_drm.h"
+
+struct vc5_job;
+struct vc5_bo;
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+
+#define __user
+#include "vc5_drm.h"
+#include "vc5_bufmgr.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+
+#ifdef USE_VC5_SIMULATOR
+#define using_vc5_simulator true
+#else
+#define using_vc5_simulator false
+#endif
+
+#define VC5_DIRTY_BLEND (1 << 0)
+#define VC5_DIRTY_RASTERIZER (1 << 1)
+#define VC5_DIRTY_ZSA (1 << 2)
+#define VC5_DIRTY_FRAGTEX (1 << 3)
+#define VC5_DIRTY_VERTTEX (1 << 4)
+
+#define VC5_DIRTY_BLEND_COLOR (1 << 7)
+#define VC5_DIRTY_STENCIL_REF (1 << 8)
+#define VC5_DIRTY_SAMPLE_MASK (1 << 9)
+#define VC5_DIRTY_FRAMEBUFFER (1 << 10)
+#define VC5_DIRTY_STIPPLE (1 << 11)
+#define VC5_DIRTY_VIEWPORT (1 << 12)
+#define VC5_DIRTY_CONSTBUF (1 << 13)
+#define VC5_DIRTY_VTXSTATE (1 << 14)
+#define VC5_DIRTY_VTXBUF (1 << 15)
+#define VC5_DIRTY_SCISSOR (1 << 17)
+#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18)
+#define VC5_DIRTY_PRIM_MODE (1 << 19)
+#define VC5_DIRTY_CLIP (1 << 20)
+#define VC5_DIRTY_UNCOMPILED_VS (1 << 21)
+#define VC5_DIRTY_UNCOMPILED_FS (1 << 22)
+#define VC5_DIRTY_COMPILED_CS (1 << 23)
+#define VC5_DIRTY_COMPILED_VS (1 << 24)
+#define VC5_DIRTY_COMPILED_FS (1 << 25)
+#define VC5_DIRTY_FS_INPUTS (1 << 26)
+#define VC5_DIRTY_STREAMOUT (1 << 27)
+
+#define VC5_MAX_FS_INPUTS 64
+
+struct vc5_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t p0;
+ uint32_t p1;
+ /* Precomputed swizzles to pass in to the shader key. */
+ uint8_t swizzle[4];
+
+ uint8_t texture_shader_state[32];
+};
+
+struct vc5_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t p0;
+ uint32_t p1;
+
+ uint8_t texture_shader_state[32];
+};
+
+struct vc5_texture_stateobj {
+ struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+ unsigned num_textures;
+ struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+ struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS];
+};
+
+struct vc5_shader_uniform_info {
+ enum quniform_contents *contents;
+ uint32_t *data;
+ uint32_t count;
+};
+
+struct vc5_uncompiled_shader {
+ /** A name for this program, so you can track it in shader-db output. */
+ uint32_t program_id;
+ /** How many variants of this program were compiled, for shader-db. */
+ uint32_t compiled_variant_count;
+ struct pipe_shader_state base;
+ uint32_t num_tf_outputs;
+ struct v3d_varying_slot *tf_outputs;
+ uint16_t tf_specs[PIPE_MAX_SO_BUFFERS];
+ uint32_t num_tf_specs;
+};
+
+struct vc5_compiled_shader {
+ struct vc5_bo *bo;
+
+ union {
+ struct v3d_prog_data *base;
+ struct v3d_vs_prog_data *vs;
+ struct v3d_fs_prog_data *fs;
+ } prog_data;
+
+ /**
+ * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
+};
+
+struct vc5_program_stateobj {
+ struct vc5_uncompiled_shader *bind_vs, *bind_fs;
+ struct vc5_compiled_shader *cs, *vs, *fs;
+};
+
+struct vc5_constbuf_stateobj {
+ struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertexbuf_stateobj {
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ unsigned count;
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertex_stateobj {
+ struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+ unsigned num_elements;
+};
+
+struct vc5_streamout_stateobj {
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_targets;
+};
+
+/* Hash table key for vc5->jobs */
+struct vc5_job_key {
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+};
+
+/**
+ * A complete bin/render job.
+ *
+ * This is all of the state necessary to submit a bin/render to the kernel.
+ * We want to be able to have multiple in progress at a time, so that we don't
+ * need to flush an existing CL just to switch to rendering to a new render
+ * target (which would mean reading back from the old render target when
+ * starting to render to it again).
+ */
+struct vc5_job {
+ struct vc5_context *vc5;
+ struct vc5_cl bcl;
+ struct vc5_cl rcl;
+ struct vc5_cl indirect;
+ struct vc5_bo *tile_alloc;
+ uint32_t shader_rec_count;
+
+ struct drm_vc5_submit_cl submit;
+
+ /**
+ * Set of all BOs referenced by the job. This will be used for making
+ * the list of BOs that the kernel will need to have paged in to
+ * execute our job.
+ */
+ struct set *bos;
+ /* Size of the submit.bo_handles array. */
+ uint32_t bo_handles_size;
+
+ /** @{ Surfaces to submit rendering for. */
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+ /** @} */
+ /** @{
+ * Bounding box of the scissor across all queued drawing.
+ *
+ * Note that the max values are exclusive.
+ */
+ uint32_t draw_min_x;
+ uint32_t draw_min_y;
+ uint32_t draw_max_x;
+ uint32_t draw_max_y;
+ /** @} */
+ /** @{
+ * Width/height of the color framebuffer being rendered to,
+ * for VC5_TILE_RENDERING_MODE_CONFIG.
+ */
+ uint32_t draw_width;
+ uint32_t draw_height;
+ /** @} */
+ /** @{ Tile information, depending on MSAA and float color buffer. */
+ uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
+ uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
+
+ uint32_t tile_width; /** @< Width of a tile. */
+ uint32_t tile_height; /** @< Height of a tile. */
+ /** maximum internal_bpp of all color render targets. */
+ uint32_t internal_bpp;
+
+ /** Whether the current rendering is in a 4X MSAA tile buffer. */
+ bool msaa;
+ /** @} */
+
+ /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
+ * first rendering.
+ */
+ uint32_t cleared;
+ /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
+ * (either clears or draws).
+ */
+ uint32_t resolve;
+ uint32_t clear_color[2];
+ uint32_t clear_zs; /**< 24-bit unorm depth/stencil */
+
+ /**
+ * Set if some drawing (triangles, blits, or just a glClear()) has
+ * been done to the FBO, meaning that we need to
+ * DRM_IOCTL_VC5_SUBMIT_CL.
+ */
+ bool needs_flush;
+
+ bool uses_early_z;
+
+ /**
+ * Number of draw calls (not counting full buffer clears) queued in
+ * the current job.
+ */
+ uint32_t draw_calls_queued;
+
+ struct vc5_job_key key;
+};
+
+struct vc5_context {
+ struct pipe_context base;
+
+ int fd;
+ struct vc5_screen *screen;
+
+ /** The 3D rendering job for the currently bound FBO. */
+ struct vc5_job *job;
+
+ /* Map from struct vc5_job_key to the job for that FBO.
+ */
+ struct hash_table *jobs;
+
+ /**
+ * Map from vc5_resource to a job writing to that resource.
+ *
+ * Primarily for flushing jobs rendering to textures that are now
+ * being read from.
+ */
+ struct hash_table *write_jobs;
+
+ struct slab_child_pool transfer_pool;
+ struct blitter_context *blitter;
+
+ /** bitfield of VC5_DIRTY_* */
+ uint32_t dirty;
+
+ struct primconvert_context *primconvert;
+
+ struct hash_table *fs_cache, *vs_cache;
+ uint32_t next_uncompiled_program_id;
+ uint64_t next_compiled_program_id;
+
+ struct vc5_compiler_state *compiler_state;
+
+ uint8_t prim_mode;
+
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+ /** Last index bias baked into the current shader_rec. */
+ uint32_t last_index_bias;
+
+ /** Seqno of the last CL flush's job. */
+ uint64_t last_emit_seqno;
+
+ struct u_upload_mgr *uploader;
+
+ /** @{ Current pipeline state objects */
+ struct pipe_scissor_state scissor;
+ struct pipe_blend_state *blend;
+ struct vc5_rasterizer_state *rasterizer;
+ struct vc5_depth_stencil_alpha_state *zsa;
+
+ struct vc5_texture_stateobj verttex, fragtex;
+
+ struct vc5_program_stateobj prog;
+
+ struct vc5_vertex_stateobj *vtx;
+
+ struct {
+ struct pipe_blend_color f;
+ uint16_t hf[4];
+ } blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ unsigned sample_mask;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple stipple;
+ struct pipe_clip_state clip;
+ struct pipe_viewport_state viewport;
+ struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+ struct vc5_vertexbuf_stateobj vertexbuf;
+ struct vc5_streamout_stateobj streamout;
+ /** @} */
+};
+
+struct vc5_rasterizer_state {
+ struct pipe_rasterizer_state base;
+
+ /* VC5_CONFIGURATION_BITS */
+ uint8_t config_bits[3];
+
+ float point_size;
+
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset units for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_units;
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset scale for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_factor;
+};
+
+struct vc5_depth_stencil_alpha_state {
+ struct pipe_depth_stencil_alpha_state base;
+
+ bool early_z_enable;
+
+ /** Uniforms for stencil state.
+ *
+ * Index 0 is either the front config, or the front-and-back config.
+ * Index 1 is the back config if doing separate back stencil.
+ * Index 2 is the writemask config if it's not a common mask value.
+ */
+ uint32_t stencil_uniforms[3];
+};
+
+#define perf_debug(...) do { \
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
+ fprintf(stderr, __VA_ARGS__); \
+} while (0)
+
+static inline struct vc5_context *
+vc5_context(struct pipe_context *pcontext)
+{
+ return (struct vc5_context *)pcontext;
+}
+
+static inline struct vc5_sampler_view *
+vc5_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc5_sampler_view *)psview;
+}
+
+static inline struct vc5_sampler_state *
+vc5_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc5_sampler_state *)psampler;
+}
+
+struct pipe_context *vc5_context_create(struct pipe_screen *pscreen,
+ void *priv, unsigned flags);
+void vc5_draw_init(struct pipe_context *pctx);
+void vc5_state_init(struct pipe_context *pctx);
+void vc5_program_init(struct pipe_context *pctx);
+void vc5_program_fini(struct pipe_context *pctx);
+void vc5_query_init(struct pipe_context *pctx);
+
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+int vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_vc5_submit_cl *args,
+ struct vc5_job *job);
+int vc5_simulator_ioctl(int fd, unsigned long request, void *arg);
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size);
+
+static inline int
+vc5_ioctl(int fd, unsigned long request, void *arg)
+{
+ if (using_vc5_simulator)
+ return vc5_simulator_ioctl(fd, request, arg);
+ else
+ return drmIoctl(fd, request, arg);
+}
+
+void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader);
+struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate);
+
+void vc5_flush(struct pipe_context *pctx);
+void vc5_job_init(struct vc5_context *vc5);
+struct vc5_job *vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs,
+ struct pipe_surface *zsbuf);
+struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5);
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job);
+void vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_emit_state(struct pipe_context *pctx);
+void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode);
+
+bool vc5_rt_format_supported(enum pipe_format f);
+bool vc5_tex_format_supported(enum pipe_format f);
+uint8_t vc5_get_rt_format(enum pipe_format f);
+uint8_t vc5_get_tex_format(enum pipe_format f);
+uint8_t vc5_get_tex_return_size(enum pipe_format f);
+uint8_t vc5_get_tex_return_channels(enum pipe_format f);
+const uint8_t *vc5_get_format_swizzle(enum pipe_format f);
+void vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp);
+
+void vc5_init_query_functions(struct vc5_context *vc5);
+void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
+void vc5_blitter_save(struct vc5_context *vc5);
+void vc5_emit_rcl(struct vc5_job *job);
+
+
+#endif /* VC5_CONTEXT_H */
diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c
new file mode 100644
index 00000000000..d78fa3265fd
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_draw.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blitter.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_pack_color.h"
+#include "util/u_prim_restart.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+/**
+ * Does the initial bining command list setup for drawing to a given FBO.
+ */
+static void
+vc5_start_draw(struct vc5_context *vc5)
+{
+ struct vc5_job *job = vc5->job;
+
+ if (job->needs_flush)
+ return;
+
+ /* Get space to emit our BCL state, using a branch to jump to a new BO
+ * if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ job->submit.bcl_start = job->bcl.bo->offset;
+ vc5_job_add_bo(job, job->bcl.bo);
+
+ job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
+ struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen,
+ job->draw_tiles_y *
+ job->draw_tiles_x *
+ 64,
+ "TSDA");
+
+ /* "Binning mode lists start with a Tile Binning Mode Configuration
+ * item (120)"
+ *
+ * Part1 signals the end of binning config setup.
+ */
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) {
+ config.tile_allocation_memory_address =
+ cl_address(job->tile_alloc, 0);
+ config.tile_allocation_memory_size = job->tile_alloc->size;
+ }
+
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
+ config.tile_state_data_array_base_address =
+ cl_address(tsda, 0);
+
+ config.width_in_tiles = job->draw_tiles_x;
+ config.height_in_tiles = job->draw_tiles_y;
+
+ /* Must be >= 1 */
+ config.number_of_render_targets = 1;
+
+ config.multisample_mode_4x = job->msaa;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ vc5_bo_unreference(&tsda);
+
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+ cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) {
+ fmt.data_type = LIST_INDEXED;
+ fmt.primitive_type = LIST_TRIANGLES;
+ }
+
+ job->needs_flush = true;
+ job->draw_width = vc5->framebuffer.width;
+ job->draw_height = vc5->framebuffer.height;
+}
+
+static void
+vc5_predraw_check_textures(struct pipe_context *pctx,
+ struct vc5_texture_stateobj *stage_tex)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ for (int i = 0; i < stage_tex->num_textures; i++) {
+ struct pipe_sampler_view *view = stage_tex->textures[i];
+ if (!view)
+ continue;
+
+ vc5_flush_jobs_writing_resource(vc5, view->texture);
+ }
+}
+
+static struct vc5_cl_reloc
+vc5_get_default_values(struct vc5_context *vc5)
+{
+ struct vc5_job *job = vc5->job;
+
+ /* VC5_DIRTY_VTXSTATE */
+ struct vc5_vertex_stateobj *vtx = vc5->vtx;
+
+ /* Set up the default values for attributes. */
+ vc5_cl_ensure_space(&job->indirect, 4 * 4 * vtx->num_elements, 4);
+ struct vc5_cl_reloc default_values =
+ cl_address(job->indirect.bo, cl_offset(&job->indirect));
+ vc5_bo_reference(default_values.bo);
+
+ struct vc5_cl_out *defaults = cl_start(&job->indirect);
+ for (int i = 0; i < vtx->num_elements; i++) {
+ cl_aligned_f(&defaults, 0.0);
+ cl_aligned_f(&defaults, 0.0);
+ cl_aligned_f(&defaults, 0.0);
+ cl_aligned_f(&defaults, 1.0);
+ }
+ cl_end(&job->indirect, defaults);
+
+ return default_values;
+}
+
+static void
+vc5_emit_gl_shader_state(struct vc5_context *vc5,
+ const struct pipe_draw_info *info,
+ uint32_t extra_index_bias)
+{
+ struct vc5_job *job = vc5->job;
+ /* VC5_DIRTY_VTXSTATE */
+ struct vc5_vertex_stateobj *vtx = vc5->vtx;
+ /* VC5_DIRTY_VTXBUF */
+ struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf;
+
+ /* Upload the uniforms to the indirect CL first */
+ struct vc5_cl_reloc fs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.fs,
+ &vc5->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc5->fragtex);
+ struct vc5_cl_reloc vs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.vs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+ struct vc5_cl_reloc cs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.cs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+ struct vc5_cl_reloc default_values = vc5_get_default_values(vc5);
+
+ uint32_t shader_rec_offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(GL_SHADER_STATE_RECORD) +
+ vtx->num_elements *
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+ 32);
+
+ cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
+ shader.enable_clipping = true;
+ /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
+ shader.point_size_in_shaded_vertex_data =
+ (info->mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ shader.fragment_shader_does_z_writes =
+ vc5->prog.fs->prog_data.fs->writes_z;
+
+ shader.number_of_varyings_in_fragment_shader =
+ vc5->prog.fs->prog_data.base->num_inputs;
+
+ shader.propagate_nans = true;
+
+ shader.coordinate_shader_code_address =
+ cl_address(vc5->prog.cs->bo, 0);
+ shader.vertex_shader_code_address =
+ cl_address(vc5->prog.vs->bo, 0);
+ shader.fragment_shader_code_address =
+ cl_address(vc5->prog.fs->bo, 0);
+
+ /* XXX: Use combined input/output size flag in the common
+ * case.
+ */
+ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.coordinate_shader_input_vpm_segment_size =
+ vc5->prog.cs->prog_data.vs->vpm_input_size;
+ shader.vertex_shader_input_vpm_segment_size =
+ vc5->prog.vs->prog_data.vs->vpm_input_size;
+
+ shader.coordinate_shader_output_vpm_segment_size =
+ vc5->prog.cs->prog_data.vs->vpm_output_size;
+ shader.vertex_shader_output_vpm_segment_size =
+ vc5->prog.vs->prog_data.vs->vpm_output_size;
+
+ shader.coordinate_shader_uniforms_address = cs_uniforms;
+ shader.vertex_shader_uniforms_address = vs_uniforms;
+ shader.fragment_shader_uniforms_address = fs_uniforms;
+
+ shader.vertex_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_iid;
+ shader.vertex_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_iid;
+
+ shader.address_of_default_attribute_values = default_values;
+ }
+
+ for (int i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vertexbuf->vb[elem->vertex_buffer_index];
+ struct vc5_resource *rsc = vc5_resource(vb->buffer.resource);
+ const struct util_format_description *desc =
+ util_format_description(elem->src_format);
+
+ uint32_t offset = (vb->buffer_offset +
+ elem->src_offset +
+ vb->stride * info->index_bias);
+
+ cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+ uint32_t r_size = desc->channel[0].size;
+
+ /* vec_size == 0 means 4 */
+ attr.vec_size = desc->nr_channels & 3;
+
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (r_size == 32) {
+ attr.type = ATTRIBUTE_FLOAT;
+ } else {
+ assert(r_size == 16);
+ attr.type = ATTRIBUTE_HALF_FLOAT;
+ }
+ break;
+
+ case UTIL_FORMAT_TYPE_SIGNED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ switch (r_size) {
+ case 32:
+ attr.type = ATTRIBUTE_INT;
+ break;
+ case 16:
+ attr.type = ATTRIBUTE_SHORT;
+ break;
+ case 10:
+ attr.type = ATTRIBUTE_INT2_10_10_10;
+ break;
+ case 8:
+ attr.type = ATTRIBUTE_BYTE;
+ break;
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ attr.type = ATTRIBUTE_BYTE;
+ abort();
+ }
+ break;
+
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ abort();
+ }
+
+ attr.signed_int_type =
+ desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED;
+
+ attr.normalized_int_type = desc->channel[0].normalized;
+ attr.read_as_int_uint = desc->channel[0].pure_integer;
+ attr.address = cl_address(rsc->bo, offset);
+ attr.stride = vb->stride;
+ attr.instance_divisor = elem->instance_divisor;
+ attr.number_of_values_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->vattr_sizes[i];
+ attr.number_of_values_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->vattr_sizes[i];
+ }
+ }
+
+ cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+ state.address = cl_address(job->indirect.bo, shader_rec_offset);
+ state.number_of_attribute_arrays = vtx->num_elements;
+ }
+
+ vc5_bo_unreference(&cs_uniforms.bo);
+ vc5_bo_unreference(&vs_uniforms.bo);
+ vc5_bo_unreference(&fs_uniforms.bo);
+ vc5_bo_unreference(&default_values.bo);
+
+ job->shader_rec_count++;
+}
+
+static void
+vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ if (!info->count_from_stream_output && !info->indirect &&
+ !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
+ return;
+
+ /* Fall back for weird desktop GL primitive restart values. */
+ if (info->primitive_restart &&
+ info->index_size) {
+ uint32_t mask = ~0;
+
+ switch (info->index_size) {
+ case 2:
+ mask = 0xffff;
+ break;
+ case 1:
+ mask = 0xff;
+ break;
+ }
+
+ if (info->restart_index != mask) {
+ util_draw_vbo_without_prim_restart(pctx, info);
+ return;
+ }
+ }
+
+ if (info->mode >= PIPE_PRIM_QUADS) {
+ util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base);
+ util_primconvert_draw_vbo(vc5->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
+
+ /* Before setting up the draw, do any fixup blits necessary. */
+ vc5_predraw_check_textures(pctx, &vc5->verttex);
+ vc5_predraw_check_textures(pctx, &vc5->fragtex);
+
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* Get space to emit our draw call into the BCL, using a branch to
+ * jump to a new BO if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ if (vc5->prim_mode != info->mode) {
+ vc5->prim_mode = info->mode;
+ vc5->dirty |= VC5_DIRTY_PRIM_MODE;
+ }
+
+ vc5_start_draw(vc5);
+ vc5_update_compiled_shaders(vc5, info->mode);
+
+ vc5_emit_state(pctx);
+
+ if ((vc5->dirty & (VC5_DIRTY_VTXBUF |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_COMPILED_CS |
+ VC5_DIRTY_COMPILED_VS |
+ VC5_DIRTY_COMPILED_FS |
+ vc5->prog.cs->uniform_dirty_bits |
+ vc5->prog.vs->uniform_dirty_bits |
+ vc5->prog.fs->uniform_dirty_bits)) ||
+ vc5->last_index_bias != info->index_bias) {
+ vc5_emit_gl_shader_state(vc5, info, 0);
+ }
+
+ vc5->dirty = 0;
+
+ /* Note that the primitive type fields match with OpenGL/gallium
+ * definitions, up to but not including QUADS.
+ */
+ if (info->index_size) {
+ uint32_t index_size = info->index_size;
+ uint32_t offset = info->start * index_size;
+ struct pipe_resource *prsc;
+ if (info->has_user_indices) {
+ prsc = NULL;
+ u_upload_data(vc5->uploader, 0,
+ info->count * info->index_size, 4,
+ info->index.user,
+ &offset, &prsc);
+ } else {
+ prsc = info->index.resource;
+ }
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+ prim.mode = info->mode;
+ prim.enable_primitive_restarts = info->primitive_restart;
+
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+ prim.length = info->count;
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+ prim.mode = info->mode;
+ prim.enable_primitive_restarts = info->primitive_restart;
+ }
+ }
+
+ job->draw_calls_queued++;
+
+ if (info->has_user_indices)
+ pipe_resource_reference(&prsc, NULL);
+ } else {
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) {
+ prim.mode = info->mode;
+ prim.index_of_first_vertex = info->start;
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) {
+ prim.mode = info->mode;
+ prim.length = info->count;
+ prim.index_of_first_vertex = info->start;
+ }
+ }
+ }
+ job->draw_calls_queued++;
+
+ if (vc5->zsa && job->zsbuf &&
+ (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled)) {
+ struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+ vc5_job_add_bo(job, rsc->bo);
+
+ if (vc5->zsa->base.depth.enabled) {
+ job->resolve |= PIPE_CLEAR_DEPTH;
+ rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
+
+ if (vc5->zsa->early_z_enable)
+ job->uses_early_z = true;
+ }
+
+ if (vc5->zsa->base.stencil[0].enabled) {
+ job->resolve |= PIPE_CLEAR_STENCIL;
+ rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
+ }
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+
+ if (job->resolve & bit || !job->cbufs[i])
+ continue;
+ struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture);
+
+ job->resolve |= bit;
+ vc5_job_add_bo(job, rsc->bo);
+ }
+
+ if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ vc5_flush(pctx);
+}
+
+static uint32_t
+pack_rgba(enum pipe_format format, const float *rgba)
+{
+ union util_color uc;
+ util_pack_color(rgba, format, &uc);
+ if (util_format_get_blocksize(format) == 2)
+ return uc.us;
+ else
+ return uc.ui[0];
+}
+
+static void
+vc5_clear(struct pipe_context *pctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* We can't flag new buffers for clearing once we've queued draws. We
+ * could avoid this by using the 3d engine to clear.
+ */
+ if (job->draw_calls_queued) {
+ perf_debug("Flushing rendering to process new clear.\n");
+ vc5_job_submit(vc5, job);
+ job = vc5_get_job_for_fbo(vc5);
+ }
+
+ if (buffers & PIPE_CLEAR_COLOR0) {
+ struct vc5_resource *rsc =
+ vc5_resource(vc5->framebuffer.cbufs[0]->texture);
+ uint32_t clear_color;
+
+#if 0
+ if (vc5_rt_format_is_565(vc5->framebuffer.cbufs[0]->format)) {
+ /* In 565 mode, the hardware will be packing our color
+ * for us.
+ */
+ clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
+ color->f);
+ } else {
+ /* Otherwise, we need to do this packing because we
+ * support multiple swizzlings of RGBA8888.
+ */
+ clear_color =
+ pack_rgba(vc5->framebuffer.cbufs[0]->format,
+ color->f);
+ }
+#endif
+ clear_color = pack_rgba(vc5->framebuffer.cbufs[0]->format,
+ color->f);
+
+ job->clear_color[0] = job->clear_color[1] = clear_color;
+ rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ struct vc5_resource *rsc =
+ vc5_resource(vc5->framebuffer.zsbuf->texture);
+ unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ job->clear_zs |=
+ util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM,
+ depth, 0);
+ }
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ job->clear_zs |=
+ util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM,
+ 0, stencil);
+ }
+
+ rsc->initialized_buffers |= zsclear;
+ }
+
+ job->draw_min_x = 0;
+ job->draw_min_y = 0;
+ job->draw_max_x = vc5->framebuffer.width;
+ job->draw_max_y = vc5->framebuffer.height;
+ job->cleared |= buffers;
+ job->resolve |= buffers;
+
+ vc5_start_draw(vc5);
+}
+
+static void
+vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+ const union pipe_color_union *color,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear RT\n");
+}
+
+static void
+vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+ unsigned buffers, double depth, unsigned stencil,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear DS\n");
+}
+
+void
+vc5_draw_init(struct pipe_context *pctx)
+{
+ pctx->draw_vbo = vc5_draw_vbo;
+ pctx->clear = vc5_clear;
+ pctx->clear_render_target = vc5_clear_render_target;
+ pctx->clear_depth_stencil = vc5_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/src/gallium/drivers/vc5/vc5_drm.h
new file mode 100644
index 00000000000..e70cf9d56a6
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_drm.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _VC5_DRM_H_
+#define _VC5_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_VC5_SUBMIT_CL 0x00
+#define DRM_VC5_WAIT_SEQNO 0x01
+#define DRM_VC5_WAIT_BO 0x02
+#define DRM_VC5_CREATE_BO 0x03
+#define DRM_VC5_MMAP_BO 0x04
+#define DRM_VC5_GET_PARAM 0x05
+#define DRM_VC5_GET_BO_OFFSET 0x06
+
+#define DRM_IOCTL_VC5_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl)
+#define DRM_IOCTL_VC5_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno)
+#define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo)
+#define DRM_IOCTL_VC5_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo)
+#define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo)
+#define DRM_IOCTL_VC5_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_PARAM, struct drm_vc5_get_param)
+#define DRM_IOCTL_VC5_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset)
+
+/**
+ * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute an optional binner
+ * command list, and a render command list.
+ */
+struct drm_vc5_submit_cl {
+ /* Pointer to the binner command list.
+ *
+ * This is the first set of commands executed, which runs the
+ * coordinate shader to determine where primitives land on the screen,
+ * then writes out the state updates and draw calls necessary per tile
+ * to the tile allocation BO.
+ */
+ __u32 bcl_start;
+
+ /** End address of the BCL (first byte after the BCL) */
+ __u32 bcl_end;
+
+ /* Offset of the render command list.
+ *
+ * This is the second set of commands executed, which will either
+ * execute the tiles that have been set up by the BCL, or a fixed set
+ * of tiles (in the case of RCL-only blits).
+ */
+ __u32 rcl_start;
+
+ /** End address of the RCL (first byte after the RCL) */
+ __u32 rcl_end;
+
+ /* Pointer to a u32 array of the BOs that are referenced by the job.
+ */
+ __u64 bo_handles;
+
+ /* Pointer to an array of chunks of extra submit CL information. (the
+ * chunk struct is not yet defined)
+ */
+ __u64 chunks;
+
+ /* Number of BO handles passed in (size is that times 4). */
+ __u32 bo_handle_count;
+
+ __u32 chunk_count;
+
+ __u64 flags;
+};
+
+/**
+ * struct drm_vc5_wait_seqno - ioctl argument for waiting for
+ * DRM_VC5_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc5_wait_seqno {
+ __u64 seqno;
+ __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC5_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc5_wait_bo {
+ __u32 handle;
+ __u32 pad;
+ __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_create_bo - ioctl argument for creating VC5 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_create_bo {
+ __u32 size;
+ __u32 flags;
+ /** Returned GEM handle for the BO. */
+ __u32 handle;
+ /**
+ * Returned offset for the BO in the V3D address space. This offset
+ * is private to the DRM fd and is valid for the lifetime of the GEM
+ * handle.
+ */
+ __u32 offset;
+};
+
+/**
+ * struct drm_vc5_mmap_bo - ioctl argument for mapping VC5 BOs.
+ *
+ * This doesn't actually perform an mmap. Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node. This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_mmap_bo {
+ /** Handle for the object being mapped. */
+ __u32 handle;
+ __u32 flags;
+ /** offset into the drm node to use for subsequent mmap call. */
+ __u64 offset;
+};
+
+enum drm_vc5_param {
+ DRM_VC5_PARAM_V3D_UIFCFG,
+ DRM_VC5_PARAM_V3D_HUB_IDENT1,
+ DRM_VC5_PARAM_V3D_HUB_IDENT2,
+ DRM_VC5_PARAM_V3D_HUB_IDENT3,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT2,
+};
+
+struct drm_vc5_get_param {
+ __u32 param;
+ __u32 pad;
+ __u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the V3D address space for this DRM fd.
+ * This is the same value returned by drm_vc5_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_vc5_get_bo_offset {
+ __u32 handle;
+ __u32 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _VC5_DRM_H_ */
diff --git a/src/gallium/drivers/vc5/vc5_emit.c b/src/gallium/drivers/vc5/vc5_emit.c
new file mode 100644
index 00000000000..29ccfcdacdb
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_emit.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_half.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+static uint8_t
+vc5_factor(enum pipe_blendfactor factor)
+{
+ /* We may get a bad blendfactor when blending is disabled. */
+ if (factor == 0)
+ return V3D_BLEND_FACTOR_ZERO;
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ case PIPE_BLENDFACTOR_ONE:
+ return V3D_BLEND_FACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return V3D_BLEND_FACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return V3D_BLEND_FACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return V3D_BLEND_FACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return V3D_BLEND_FACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return V3D_BLEND_FACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return V3D_BLEND_FACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return V3D_BLEND_FACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return V3D_BLEND_FACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+ default:
+ unreachable("Bad blend factor");
+ }
+}
+
+static inline uint16_t
+swizzled_border_color(struct pipe_sampler_state *sampler,
+ struct vc5_sampler_view *sview,
+ int chan)
+{
+ const struct util_format_description *desc =
+ util_format_description(sview->base.format);
+ uint8_t swiz = chan;
+
+ /* If we're doing swizzling in the sampler, then only rearrange the
+ * border color for the mismatch between the VC5 texture format and
+ * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
+ * the sampler's swizzle.
+ *
+ * For swizzling in the shader, we don't do any pre-swizzling of the
+ * border color.
+ */
+ if (vc5_get_tex_return_size(sview->base.format) != 32)
+ swiz = desc->swizzle[swiz];
+
+ switch (swiz) {
+ case PIPE_SWIZZLE_0:
+ return util_float_to_half(0.0);
+ case PIPE_SWIZZLE_1:
+ return util_float_to_half(1.0);
+ default:
+ return util_float_to_half(sampler->border_color.f[swiz]);
+ }
+}
+
+static void
+emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
+ int i)
+{
+ struct vc5_job *job = vc5->job;
+ struct pipe_sampler_state *psampler = stage_tex->samplers[i];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+ struct pipe_sampler_view *psview = stage_tex->textures[i];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+ struct pipe_resource *prsc = psview->texture;
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ stage_tex->texture_state[i].offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(TEXTURE_SHADER_STATE),
+ 32);
+ vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
+ job->indirect.bo);
+
+ struct V3D33_TEXTURE_SHADER_STATE unpacked = {
+ /* XXX */
+ .border_color_red = swizzled_border_color(psampler, sview, 0),
+ .border_color_green = swizzled_border_color(psampler, sview, 1),
+ .border_color_blue = swizzled_border_color(psampler, sview, 2),
+ .border_color_alpha = swizzled_border_color(psampler, sview, 3),
+
+ /* XXX: Disable min/maxlod for txf */
+ .max_level_of_detail = MIN2(MIN2(psampler->max_lod,
+ VC5_MAX_MIP_LEVELS),
+ psview->u.tex.last_level),
+
+ .texture_base_pointer = cl_address(rsc->bo,
+ rsc->slices[0].offset),
+ };
+
+ int min_img_filter = psampler->min_img_filter;
+ int min_mip_filter = psampler->min_mip_filter;
+ int mag_img_filter = psampler->mag_img_filter;
+
+ if (vc5_get_tex_return_size(psview->format) == 32) {
+ min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ }
+
+ bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST);
+ switch (min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ unpacked.minification_filter = 0 + min_nearest;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ unpacked.minification_filter = 2 + !min_nearest;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ unpacked.minification_filter = 4 + !min_nearest;
+ break;
+ }
+ unpacked.magnification_filter = (mag_img_filter ==
+ PIPE_TEX_FILTER_NEAREST);
+
+ uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
+ cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
+
+ for (int i = 0; i < ARRAY_SIZE(packed); i++)
+ packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
+
+ cl_emit_prepacked(&job->indirect, &packed);
+}
+
+static void
+emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
+{
+ for (int i = 0; i < stage_tex->num_textures; i++)
+ emit_one_texture(vc5, stage_tex, i);
+}
+
+void
+vc5_emit_state(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5->job;
+
+ if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
+ VC5_DIRTY_RASTERIZER)) {
+ float *vpscale = vc5->viewport.scale;
+ float *vptranslate = vc5->viewport.translate;
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
+
+ /* Clip to the scissor if it's enabled, but still clip to the
+ * drawable regardless since that controls where the binner
+ * tries to put things.
+ *
+ * Additionally, always clip the rendering to the viewport,
+ * since the hardware does guardband clipping, meaning
+ * primitives would rasterize outside of the view volume.
+ */
+ uint32_t minx, miny, maxx, maxy;
+ if (!vc5->rasterizer->base.scissor) {
+ minx = MAX2(vp_minx, 0);
+ miny = MAX2(vp_miny, 0);
+ maxx = MIN2(vp_maxx, job->draw_width);
+ maxy = MIN2(vp_maxy, job->draw_height);
+ } else {
+ minx = MAX2(vp_minx, vc5->scissor.minx);
+ miny = MAX2(vp_miny, vc5->scissor.miny);
+ maxx = MIN2(vp_maxx, vc5->scissor.maxx);
+ maxy = MIN2(vp_maxy, vc5->scissor.maxy);
+ }
+
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
+ clip.clip_window_left_pixel_coordinate = minx;
+ clip.clip_window_bottom_pixel_coordinate = miny;
+ clip.clip_window_height_in_pixels = maxy - miny;
+ clip.clip_window_width_in_pixels = maxx - minx;
+ clip.clip_window_height_in_pixels = maxy - miny;
+ }
+
+ job->draw_min_x = MIN2(job->draw_min_x, minx);
+ job->draw_min_y = MIN2(job->draw_min_y, miny);
+ job->draw_max_x = MAX2(job->draw_max_x, maxx);
+ job->draw_max_y = MAX2(job->draw_max_y, maxy);
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_COMPILED_FS)) {
+ cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
+ config.enable_forward_facing_primitive =
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_FRONT);
+ config.enable_reverse_facing_primitive =
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_BACK);
+ /* This seems backwards, but it's what gets the
+ * clipflat test to pass.
+ */
+ config.clockwise_primitives =
+ vc5->rasterizer->base.front_ccw;
+
+ config.enable_depth_offset =
+ vc5->rasterizer->base.offset_tri;
+
+ config.rasterizer_oversample_mode =
+ vc5->rasterizer->base.multisample;
+
+ config.blend_enable = vc5->blend->rt[0].blend_enable;
+
+ config.early_z_updates_enable = true;
+ if (vc5->zsa->base.depth.enabled) {
+ config.z_updates_enable =
+ vc5->zsa->base.depth.writemask;
+ config.early_z_enable =
+ vc5->zsa->early_z_enable;
+ config.depth_test_function =
+ vc5->zsa->base.depth.func;
+ } else {
+ config.depth_test_function = PIPE_FUNC_ALWAYS;
+ }
+ }
+
+ }
+
+ if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
+ cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
+ depth.depth_offset_factor =
+ vc5->rasterizer->offset_factor;
+ depth.depth_offset_units =
+ vc5->rasterizer->offset_units;
+ }
+
+ cl_emit(&job->bcl, POINT_SIZE, point_size) {
+ point_size.point_size = vc5->rasterizer->point_size;
+ }
+
+ cl_emit(&job->bcl, LINE_WIDTH, line_width) {
+ line_width.line_width = vc5->rasterizer->base.line_width;
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_256th_of_pixel =
+ vc5->viewport.scale[0] * 256.0f;
+ clip.viewport_half_height_in_1_256th_of_pixel =
+ vc5->viewport.scale[1] * 256.0f;
+ }
+
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs =
+ vc5->viewport.translate[2];
+ clip.viewport_z_scale_zc_to_zs =
+ vc5->viewport.scale[2];
+ }
+ if (0 /* XXX */) {
+ cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+ clip.minimum_zw = (vc5->viewport.translate[2] -
+ vc5->viewport.scale[2]);
+ clip.maximum_zw = (vc5->viewport.translate[2] +
+ vc5->viewport.scale[2]);
+ }
+ }
+
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
+ vp.viewport_centre_x_coordinate =
+ vc5->viewport.translate[0];
+ vp.viewport_centre_y_coordinate =
+ vc5->viewport.translate[1];
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND) {
+ struct pipe_blend_state *blend = vc5->blend;
+
+ cl_emit(&job->bcl, BLEND_CONFIG, config) {
+ struct pipe_rt_blend_state *rtblend = &blend->rt[0];
+
+ config.colour_blend_mode = rtblend->rgb_func;
+ config.colour_blend_dst_factor =
+ vc5_factor(rtblend->rgb_dst_factor);
+ config.colour_blend_src_factor =
+ vc5_factor(rtblend->rgb_src_factor);
+
+ config.alpha_blend_mode = rtblend->alpha_func;
+ config.alpha_blend_dst_factor =
+ vc5_factor(rtblend->alpha_dst_factor);
+ config.alpha_blend_src_factor =
+ vc5_factor(rtblend->alpha_src_factor);
+ }
+
+ cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
+ mask.render_target_0_per_colour_component_write_masks =
+ (~blend->rt[0].colormask) & 0xf;
+ mask.render_target_1_per_colour_component_write_masks =
+ (~blend->rt[1].colormask) & 0xf;
+ mask.render_target_2_per_colour_component_write_masks =
+ (~blend->rt[2].colormask) & 0xf;
+ mask.render_target_3_per_colour_component_write_masks =
+ (~blend->rt[3].colormask) & 0xf;
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
+ cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
+ /* XXX: format-dependent swizzling */
+ colour.red_f16 = vc5->blend_color.hf[2];
+ colour.green_f16 = vc5->blend_color.hf[1];
+ colour.blue_f16 = vc5->blend_color.hf[0];
+ colour.alpha_f16 = vc5->blend_color.hf[3];
+ }
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
+ struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
+ struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
+
+ cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+ config.front_config = true;
+ config.back_config = !back->enabled;
+
+ config.stencil_write_mask = front->writemask;
+ config.stencil_test_mask = front->valuemask;
+
+ config.stencil_test_function = front->func;
+ config.stencil_pass_op = front->zpass_op;
+ config.depth_test_fail_op = front->zfail_op;
+ config.stencil_test_fail_op = front->fail_op;
+
+ config.stencil_ref_value = vc5->stencil_ref.ref_value[0];
+ }
+
+ if (back->enabled) {
+ cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+ config.front_config = false;
+ config.back_config = true;
+
+ config.stencil_write_mask = back->writemask;
+ config.stencil_test_mask = back->valuemask;
+
+ config.stencil_test_function = back->func;
+ config.stencil_pass_op = back->zpass_op;
+ config.depth_test_fail_op = back->zfail_op;
+ config.stencil_test_fail_op = back->fail_op;
+
+ config.stencil_ref_value =
+ vc5->stencil_ref.ref_value[1];
+ }
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_FRAGTEX)
+ emit_textures(vc5, &vc5->fragtex);
+
+ if (vc5->dirty & VC5_DIRTY_VERTTEX)
+ emit_textures(vc5, &vc5->fragtex);
+
+ if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
+ /* XXX: Need to handle more than 24 entries. */
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+ flags.varying_offset_v0 = 0;
+
+ flags.flat_shade_flags_for_varyings_v024 =
+ vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
+
+ if (vc5->rasterizer->base.flatshade) {
+ flags.flat_shade_flags_for_varyings_v024 |=
+ vc5->prog.fs->prog_data.fs->color_inputs[0] & 0xfffff;
+ }
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
+ struct vc5_streamout_stateobj *so = &vc5->streamout;
+
+ if (so->num_targets) {
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
+ tfe.number_of_32_bit_output_buffer_address_following =
+ so->num_targets;
+ tfe.number_of_16_bit_output_data_specs_following =
+ vc5->prog.bind_vs->num_tf_specs;
+ };
+
+ for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
+ cl_emit_prepacked(&job->bcl,
+ &vc5->prog.bind_vs->tf_specs[i]);
+ }
+
+ for (int i = 0; i < so->num_targets; i++) {
+ const struct pipe_stream_output_target *target =
+ so->targets[i];
+ struct vc5_resource *rsc =
+ vc5_resource(target->buffer);
+
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
+ output.address =
+ cl_address(rsc->bo,
+ target->buffer_offset);
+ };
+ /* XXX: buffer_size? */
+ }
+ } else {
+ /* XXX? */
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc5/vc5_fence.c b/src/gallium/drivers/vc5/vc5_fence.c
new file mode 100644
index 00000000000..08de9bca5a1
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_fence.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed. The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "vc5_screen.h"
+#include "vc5_bufmgr.h"
+
+struct vc5_fence {
+ struct pipe_reference reference;
+ uint64_t seqno;
+};
+
+static void
+vc5_fence_reference(struct pipe_screen *pscreen,
+ struct pipe_fence_handle **pp,
+ struct pipe_fence_handle *pf)
+{
+ struct vc5_fence **p = (struct vc5_fence **)pp;
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+ struct vc5_fence *old = *p;
+
+ if (pipe_reference(&(*p)->reference, &f->reference)) {
+ free(old);
+ }
+ *p = f;
+}
+
+static boolean
+vc5_fence_finish(struct pipe_screen *pscreen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *pf,
+ uint64_t timeout_ns)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+
+ return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
+}
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno)
+{
+ struct vc5_fence *f = calloc(1, sizeof(*f));
+
+ if (!f)
+ return NULL;
+
+ pipe_reference_init(&f->reference, 1);
+ f->seqno = seqno;
+
+ return f;
+}
+
+void
+vc5_fence_init(struct vc5_screen *screen)
+{
+ screen->base.fence_reference = vc5_fence_reference;
+ screen->base.fence_finish = vc5_fence_finish;
+}
diff --git a/src/gallium/drivers/vc5/vc5_formats.c b/src/gallium/drivers/vc5/vc5_formats.c
new file mode 100644
index 00000000000..fe2600207e9
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_formats.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_formats.c
+ *
+ * Contains the table and accessors for VC5 texture and render target format
+ * support.
+ *
+ * The hardware has limited support for texture formats, and extremely limited
+ * support for render target formats. As a result, we emulate other formats
+ * in our shader code, and this stores the table for doing so.
+ */
+
+#include "util/u_format.h"
+#include "util/macros.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#define OUTPUT_IMAGE_FORMAT_NO 255
+
+struct vc5_format {
+ /** Set if the pipe format is defined in the table. */
+ bool present;
+
+ /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+ uint8_t rt_type;
+
+ /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+ uint8_t tex_type;
+
+ /**
+ * Swizzle to apply to the RGBA shader output for storing to the tile
+ * buffer, to the RGBA tile buffer to produce shader input (for
+ * blending), and for turning the rgba8888 texture sampler return
+ * value into shader rgba values.
+ */
+ uint8_t swizzle[4];
+
+ /* Whether the return value is 16F/I/UI or 32F/I/UI. */
+ uint8_t return_size;
+
+ /* If return_size == 32, how many channels are returned by texturing.
+ * 16 always returns 2 pairs of 16 bit values.
+ */
+ uint8_t return_channels;
+};
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \
+ [PIPE_FORMAT_##pipe] = { \
+ true, \
+ OUTPUT_IMAGE_FORMAT_##rt, \
+ TEXTURE_DATA_FORMAT_##tex, \
+ swiz, \
+ return_size, \
+ return_channels, \
+ }
+
+#define SWIZ_X001 SWIZ(X, 0, 0, 1)
+#define SWIZ_XY01 SWIZ(X, Y, 0, 1)
+#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1)
+#define SWIZ_XYZW SWIZ(X, Y, Z, W)
+#define SWIZ_YZWX SWIZ(Y, Z, W, X)
+#define SWIZ_YZW1 SWIZ(Y, Z, W, 1)
+#define SWIZ_ZYXW SWIZ(Z, Y, X, W)
+#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1)
+#define SWIZ_XXXY SWIZ(X, X, X, Y)
+#define SWIZ_XXX1 SWIZ(X, X, X, 1)
+#define SWIZ_XXXX SWIZ(X, X, X, X)
+#define SWIZ_000X SWIZ(0, 0, 0, X)
+
+static const struct vc5_format vc5_format_table[] = {
+ FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_UNORM, RGBX8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_SRGB, SRGBX8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_UNORM, RGBX8, RGBA8, SWIZ_XYZ1, 16, 0),
+ FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0),
+ FORMAT(B10G10R10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_ZYXW, 16, 0),
+
+ FORMAT(B4G4R4A4_UNORM, ABGR4444, RGBA4, SWIZ_YZWX, 16, 0),
+ FORMAT(B4G4R4X4_UNORM, ABGR4444, RGBA4, SWIZ_YZW1, 16, 0),
+
+ FORMAT(B5G5R5A1_UNORM, NO, RGB5_A1, SWIZ_YZWX, 16, 0),
+ FORMAT(B5G5R5X1_UNORM, NO, RGB5_A1, SWIZ_YZW1, 16, 0),
+ FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0),
+
+ FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0),
+ FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0),
+
+ FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1),
+ FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1),
+ FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0),
+ FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1),
+
+ FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2),
+
+ FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0),
+ FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4),
+
+ /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */
+ FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1),
+ FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1),
+ FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1),
+ FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1),
+ FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1),
+ FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1),
+ FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2),
+ FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2),
+
+ FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0),
+ FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0),
+ FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0),
+ FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0),
+
+ FORMAT(R8_SINT, R8I, S8, SWIZ_X001, 16, 0),
+ FORMAT(R8_UINT, R8UI, S8, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_SINT, RG8I, S16, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_UINT, RG8UI, S16, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8B8A8_SINT, RGBA8I, R32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8A8_UINT, RGBA8UI, R32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R16_SINT, R16I, S16, SWIZ_X001, 16, 0),
+ FORMAT(R16_UINT, R16UI, S16, SWIZ_X001, 16, 0),
+ FORMAT(R16G16_SINT, RG16I, R32F, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16_UINT, RG16UI, R32F, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16B16A16_SINT, RGBA16I, RG32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R16G16B16A16_UINT, RGBA16UI, RG32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R32_SINT, R32I, R32F, SWIZ_X001, 16, 0),
+ FORMAT(R32_UINT, R32UI, R32F, SWIZ_X001, 16, 0),
+ FORMAT(R32G32_SINT, RG32I, RG32F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32_UINT, RG32UI, RG32F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(A8_SINT, R8I, S8, SWIZ_000X, 16, 0),
+ FORMAT(A8_UINT, R8UI, S8, SWIZ_000X, 16, 0),
+ FORMAT(A16_SINT, R16I, S16, SWIZ_000X, 16, 0),
+ FORMAT(A16_UINT, R16UI, S16, SWIZ_000X, 16, 0),
+ FORMAT(A32_SINT, R32I, R32F, SWIZ_000X, 16, 0),
+ FORMAT(A32_UINT, R32UI, R32F, SWIZ_000X, 16, 0),
+
+ FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0),
+ FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0),
+
+ FORMAT(S8_UINT_Z24_UNORM, DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_X001, 32, 1),
+ FORMAT(X8Z24_UNORM, DEPTH_COMPONENT24, DEPTH24_X8, SWIZ_X001, 32, 1),
+ FORMAT(S8X24_UINT, NO, R32F, SWIZ_X001, 32, 1),
+ FORMAT(Z32_FLOAT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+
+ /* Pretend we support this, but it'll be separate Z32F depth and S8. */
+ FORMAT(Z32_FLOAT_S8X24_UINT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+
+ FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0),
+ FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0),
+
+ FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0),
+};
+
+static const struct vc5_format *
+get_format(enum pipe_format f)
+{
+ if (f >= ARRAY_SIZE(vc5_format_table) ||
+ !vc5_format_table[f].present)
+ return NULL;
+ else
+ return &vc5_format_table[f];
+}
+
+bool
+vc5_rt_format_supported(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return false;
+
+ return vf->rt_type != OUTPUT_IMAGE_FORMAT_NO;
+}
+
+uint8_t
+vc5_get_rt_format(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->rt_type;
+}
+
+bool
+vc5_tex_format_supported(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ return vf != NULL;
+}
+
+uint8_t
+vc5_get_tex_format(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->tex_type;
+}
+
+uint8_t
+vc5_get_tex_return_size(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->return_size;
+}
+
+uint8_t
+vc5_get_tex_return_channels(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->return_channels;
+}
+
+const uint8_t *
+vc5_get_format_swizzle(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+ static const uint8_t fallback[] = {0, 1, 2, 3};
+
+ if (!vf)
+ return fallback;
+
+ return vf->swizzle;
+}
+
+void
+vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp)
+{
+ switch (format) {
+ case OUTPUT_IMAGE_FORMAT_RGBA8:
+ case OUTPUT_IMAGE_FORMAT_RGBX8:
+ case OUTPUT_IMAGE_FORMAT_RGB8:
+ case OUTPUT_IMAGE_FORMAT_RG8:
+ case OUTPUT_IMAGE_FORMAT_R8:
+ case OUTPUT_IMAGE_FORMAT_ABGR4444:
+ case OUTPUT_IMAGE_FORMAT_BGR565:
+ case OUTPUT_IMAGE_FORMAT_ABGR1555:
+ *type = INTERNAL_TYPE_8;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA8I:
+ case OUTPUT_IMAGE_FORMAT_RG8I:
+ case OUTPUT_IMAGE_FORMAT_R8I:
+ *type = INTERNAL_TYPE_8I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA8UI:
+ case OUTPUT_IMAGE_FORMAT_RG8UI:
+ case OUTPUT_IMAGE_FORMAT_R8UI:
+ *type = INTERNAL_TYPE_8UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
+ case OUTPUT_IMAGE_FORMAT_SRGB:
+ case OUTPUT_IMAGE_FORMAT_RGB10_A2:
+ case OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
+ case OUTPUT_IMAGE_FORMAT_SRGBX8:
+ case OUTPUT_IMAGE_FORMAT_RGBA16F:
+ /* Note that sRGB RTs are stored in the tile buffer at 16F,
+ * and the conversion to sRGB happens at tilebuffer
+ * load/store.
+ */
+ *type = INTERNAL_TYPE_16F;
+ *bpp = INTERNAL_BPP_64;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RG16F:
+ case OUTPUT_IMAGE_FORMAT_R16F:
+ *type = INTERNAL_TYPE_16F;
+ /* Use 64bpp to make sure the TLB doesn't throw away the alpha
+ * channel before alpha test happens.
+ */
+ *bpp = INTERNAL_BPP_64;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA16I:
+ *type = INTERNAL_TYPE_16I;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG16I:
+ case OUTPUT_IMAGE_FORMAT_R16I:
+ *type = INTERNAL_TYPE_16I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA16UI:
+ *type = INTERNAL_TYPE_16UI;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG16UI:
+ case OUTPUT_IMAGE_FORMAT_R16UI:
+ *type = INTERNAL_TYPE_16UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ default:
+ /* Provide some default values, as we'll be called at RB
+ * creation time, even if an RB with this format isn't
+ * supported.
+ */
+ *type = INTERNAL_TYPE_8;
+ *bpp = INTERNAL_BPP_32;
+ break;
+ }
+}
diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c
new file mode 100644
index 00000000000..57cf96725b9
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_job.c
@@ -0,0 +1,429 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_job.c
+ *
+ * Functions for submitting VC5 render jobs to the kernel.
+ */
+
+#include <xf86drm.h>
+#include "vc5_context.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "broadcom/clif/clif_dump.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static void
+vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
+{
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ vc5_bo_unreference(&bo);
+ }
+
+ remove_from_ht(vc5->jobs, &job->key);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], NULL);
+ }
+ }
+ if (job->zsbuf) {
+ remove_from_ht(vc5->write_jobs, job->zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, NULL);
+ }
+
+ if (vc5->job == job)
+ vc5->job = NULL;
+
+ vc5_destroy_cl(&job->bcl);
+ vc5_destroy_cl(&job->rcl);
+ vc5_destroy_cl(&job->indirect);
+ vc5_bo_unreference(&job->tile_alloc);
+
+ ralloc_free(job);
+}
+
+static struct vc5_job *
+vc5_job_create(struct vc5_context *vc5)
+{
+ struct vc5_job *job = rzalloc(vc5, struct vc5_job);
+
+ job->vc5 = vc5;
+
+ vc5_init_cl(job, &job->bcl);
+ vc5_init_cl(job, &job->rcl);
+ vc5_init_cl(job, &job->indirect);
+
+ job->draw_min_x = ~0;
+ job->draw_min_y = ~0;
+ job->draw_max_x = 0;
+ job->draw_max_y = 0;
+
+ job->bos = _mesa_set_create(job,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ return job;
+}
+
+void
+vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo)
+{
+ if (!bo)
+ return;
+
+ if (_mesa_set_search(job->bos, bo))
+ return;
+
+ vc5_bo_reference(bo);
+ _mesa_set_add(job->bos, bo);
+
+ uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
+
+ if (job->submit.bo_handle_count >= job->bo_handles_size) {
+ job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
+ bo_handles = reralloc(job, bo_handles,
+ uint32_t, job->bo_handles_size);
+ job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
+ }
+ bo_handles[job->submit.bo_handle_count++] = bo->handle;
+}
+
+void
+vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+void
+vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+
+ if (_mesa_set_search(job->bos, rsc->bo)) {
+ vc5_job_submit(vc5, job);
+ /* Reminder: vc5->jobs is safe to keep iterating even
+ * after deletion of an entry.
+ */
+ continue;
+ }
+ }
+}
+
+static void
+vc5_job_set_tile_buffer_size(struct vc5_job *job)
+{
+ static const uint8_t tile_sizes[] = {
+ 64, 64,
+ 64, 32,
+ 32, 32,
+ 32, 16,
+ 16, 16,
+ };
+ int tile_size_index = 0;
+ if (job->msaa)
+ tile_size_index += 2;
+
+ if (job->cbufs[3])
+ tile_size_index += 2;
+ else if (job->cbufs[2])
+ tile_size_index++;
+
+ int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ struct vc5_surface *surf = vc5_surface(job->cbufs[i]);
+ max_bpp = MAX2(max_bpp, surf->internal_bpp);
+ }
+ }
+ job->internal_bpp = max_bpp;
+ STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
+ tile_size_index += max_bpp;
+
+ assert(tile_size_index < ARRAY_SIZE(tile_sizes));
+ job->tile_width = tile_sizes[tile_size_index * 2 + 0];
+ job->tile_height = tile_sizes[tile_size_index * 2 + 1];
+}
+
+/**
+ * Returns a vc5_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one. If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc5_job *
+vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
+{
+ /* Return the existing job for this FBO if we have one */
+ struct vc5_job_key local_key = {
+ .cbufs = {
+ cbufs[0],
+ cbufs[1],
+ cbufs[2],
+ cbufs[3],
+ },
+ .zsbuf = zsbuf,
+ };
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs,
+ &local_key);
+ if (entry)
+ return entry->data;
+
+ /* Creating a new job. Make sure that any previous jobs reading or
+ * writing these buffers are flushed.
+ */
+ struct vc5_job *job = vc5_job_create(vc5);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i]) {
+ vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], cbufs[i]);
+
+ if (cbufs[i]->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+ }
+ if (zsbuf) {
+ vc5_flush_jobs_reading_resource(vc5, zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, zsbuf);
+ if (zsbuf->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+
+ vc5_job_set_tile_buffer_size(job);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i])
+ _mesa_hash_table_insert(vc5->write_jobs,
+ cbufs[i]->texture, job);
+ }
+ if (zsbuf)
+ _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job);
+
+ memcpy(&job->key, &local_key, sizeof(local_key));
+ _mesa_hash_table_insert(vc5->jobs, &job->key, job);
+
+ return job;
+}
+
+struct vc5_job *
+vc5_get_job_for_fbo(struct vc5_context *vc5)
+{
+ if (vc5->job)
+ return vc5->job;
+
+ struct pipe_surface **cbufs = vc5->framebuffer.cbufs;
+ struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf;
+ struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf);
+
+ /* The dirty flags are tracking what's been updated while vc5->job has
+ * been bound, so set them all to ~0 when switching between jobs. We
+ * also need to reset all state at the start of rendering.
+ */
+ vc5->dirty = ~0;
+
+ /* If we're binding to uninitialized buffers, no need to load their
+ * contents before drawing.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (cbufs[i]) {
+ struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_COLOR0 << i;
+ }
+ }
+
+ if (zsbuf) {
+ struct vc5_resource *rsc = vc5_resource(zsbuf->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ }
+
+ job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width,
+ job->tile_width);
+ job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height,
+ job->tile_height);
+
+ vc5->job = job;
+
+ return job;
+}
+
+static bool
+vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr)
+{
+ struct vc5_job *job = data;
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (void *)entry->key;
+
+ if (addr >= bo->offset &&
+ addr < bo->offset + bo->size) {
+ vc5_bo_map(bo);
+ *vaddr = bo->map + addr - bo->offset;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job)
+{
+ if (!(V3D_DEBUG & V3D_DEBUG_CL))
+ return;
+
+ struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo,
+ stderr, vc5_clif_dump_lookup,
+ job);
+
+ fprintf(stderr, "BCL: 0x%08x..0x%08x\n",
+ job->submit.bcl_start, job->submit.bcl_end);
+
+ clif_dump_add_cl(clif, job->submit.bcl_start);
+
+ fprintf(stderr, "RCL: 0x%08x..0x%08x\n",
+ job->submit.rcl_start, job->submit.rcl_end);
+ clif_dump_add_cl(clif, job->submit.rcl_start);
+}
+
+/**
+ * Submits the job to the kernel and then reinitializes it.
+ */
+void
+vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
+{
+ if (!job->needs_flush)
+ goto done;
+
+ /* The RCL setup would choke if the draw bounds cause no drawing, so
+ * just drop the drawing if that's the case.
+ */
+ if (job->draw_max_x <= job->draw_min_x ||
+ job->draw_max_y <= job->draw_min_y) {
+ goto done;
+ }
+
+ vc5_emit_rcl(job);
+
+ if (cl_offset(&job->bcl) > 0) {
+ vc5_cl_ensure_space_with_branch(&job->bcl, 2);
+
+ /* Increment the semaphore indicating that binning is done and
+ * unblocking the render thread. Note that this doesn't act
+ * until the FLUSH completes.
+ */
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
+
+ /* The FLUSH caps all of our bin lists with a
+ * VC5_PACKET_RETURN.
+ */
+ cl_emit(&job->bcl, FLUSH, flush);
+ }
+
+ job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
+ job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
+
+ vc5_clif_dump(vc5, job);
+
+ if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
+ int ret;
+
+#ifndef USE_VC5_SIMULATOR
+ ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit);
+#else
+ ret = vc5_simulator_flush(vc5, &job->submit, job);
+#endif
+ static bool warned = false;
+ if (ret && !warned) {
+ fprintf(stderr, "Draw call returned %s. "
+ "Expect corruption.\n", strerror(errno));
+ warned = true;
+ }
+ }
+
+ if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) {
+ if (!vc5_wait_seqno(vc5->screen,
+ vc5->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
+
+done:
+ vc5_job_free(vc5, job);
+}
+
+static bool
+vc5_job_compare(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct vc5_job_key)) == 0;
+}
+
+static uint32_t
+vc5_job_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct vc5_job_key));
+}
+
+void
+vc5_job_init(struct vc5_context *vc5)
+{
+ vc5->jobs = _mesa_hash_table_create(vc5,
+ vc5_job_hash,
+ vc5_job_compare);
+ vc5->write_jobs = _mesa_hash_table_create(vc5,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c
new file mode 100644
index 00000000000..02625ed192b
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_program.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+#include "util/hash_table.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "nir/tgsi_to_nir.h"
+#include "compiler/v3d_compiler.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
+ const struct pipe_stream_output_info *stream_output)
+{
+ if (!stream_output->num_outputs)
+ return;
+
+ struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
+ int slot_count = 0;
+
+ for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
+ uint32_t buffer_offset = 0;
+ uint32_t vpm_start = slot_count;
+
+ for (int i = 0; i < stream_output->num_outputs; i++) {
+ const struct pipe_stream_output *output =
+ &stream_output->output[i];
+
+ if (output->output_buffer != buffer)
+ continue;
+
+ /* We assume that the SO outputs appear in increasing
+ * order in the buffer.
+ */
+ assert(output->dst_offset >= buffer_offset);
+
+ /* Pad any undefined slots in the output */
+ for (int j = buffer_offset; j < output->dst_offset; j++) {
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
+ slot_count++;
+ }
+
+ /* Set the coordinate shader up to output the
+ * components of this varying.
+ */
+ for (int j = 0; j < output->num_components; j++) {
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_VAR0 +
+ output->register_index,
+ output->start_component + j);
+ slot_count++;
+ }
+ }
+
+ uint32_t vpm_size = slot_count - vpm_start;
+ if (!vpm_size)
+ continue;
+
+ struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+ .first_shaded_vertex_value_to_output = vpm_start,
+ .number_of_consecutive_vertex_values_to_output_as_32_bit_values = vpm_size,
+ .output_buffer_to_write_to = buffer,
+ };
+ V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+ (void *)&so->tf_specs[so->num_tf_specs++],
+ &unpacked);
+ }
+
+ so->num_tf_outputs = slot_count;
+ so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
+ slot_count);
+ memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static void *
+vc5_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader);
+ if (!so)
+ return NULL;
+
+ so->program_id = vc5->next_uncompiled_program_id++;
+
+ nir_shader *s;
+
+ if (cso->type == PIPE_SHADER_IR_NIR) {
+ /* The backend takes ownership of the NIR shader on state
+ * creation.
+ */
+ s = cso->ir.nir;
+
+ NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
+ (nir_lower_io_options)0);
+ } else {
+ assert(cso->type == PIPE_SHADER_IR_TGSI);
+
+ if (V3D_DEBUG & V3D_DEBUG_TGSI) {
+ fprintf(stderr, "prog %d TGSI:\n",
+ so->program_id);
+ tgsi_dump(cso->tokens, 0);
+ fprintf(stderr, "\n");
+ }
+ s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
+ }
+
+ NIR_PASS_V(s, nir_opt_global_to_local);
+ NIR_PASS_V(s, nir_lower_regs_to_ssa);
+ NIR_PASS_V(s, nir_normalize_cubemap_coords);
+
+ NIR_PASS_V(s, nir_lower_load_const_to_scalar);
+
+ v3d_optimize_nir(s);
+
+ NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+
+ /* Garbage collect dead instructions */
+ nir_sweep(s);
+
+ so->base.type = PIPE_SHADER_IR_NIR;
+ so->base.ir.nir = s;
+
+ vc5_set_transform_feedback_outputs(so, &cso->stream_output);
+
+ if (V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage(s->stage))) {
+ fprintf(stderr, "%s prog %d NIR:\n",
+ gl_shader_stage_name(s->stage),
+ so->program_id);
+ nir_print_shader(s, stderr);
+ fprintf(stderr, "\n");
+ }
+
+ return so;
+}
+
+static struct vc5_compiled_shader *
+vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key)
+{
+ struct vc5_uncompiled_shader *shader_state = key->shader_state;
+ nir_shader *s = shader_state->base.ir.nir;
+
+ struct hash_table *ht;
+ uint32_t key_size;
+ if (s->stage == MESA_SHADER_FRAGMENT) {
+ ht = vc5->fs_cache;
+ key_size = sizeof(struct v3d_fs_key);
+ } else {
+ ht = vc5->vs_cache;
+ key_size = sizeof(struct v3d_vs_key);
+ }
+
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ if (entry)
+ return entry->data;
+
+ struct vc5_compiled_shader *shader =
+ rzalloc(NULL, struct vc5_compiled_shader);
+
+ int program_id = shader_state->program_id;
+ int variant_id =
+ p_atomic_inc_return(&shader_state->compiled_variant_count);
+ uint64_t *qpu_insts;
+ uint32_t shader_size;
+
+ switch (s->stage) {
+ case MESA_SHADER_VERTEX:
+ shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
+
+ qpu_insts = v3d_compile_vs(vc5->screen->compiler,
+ (struct v3d_vs_key *)key,
+ shader->prog_data.vs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
+
+ qpu_insts = v3d_compile_fs(vc5->screen->compiler,
+ (struct v3d_fs_key *)key,
+ shader->prog_data.fs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ default:
+ unreachable("bad stage");
+ }
+
+ vc5_set_shader_uniform_dirty_flags(shader);
+
+ shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader");
+ vc5_bo_map(shader->bo);
+ memcpy(shader->bo->map, qpu_insts, shader_size);
+
+ free(qpu_insts);
+
+ struct vc5_key *dup_key;
+ dup_key = ralloc_size(shader, key_size);
+ memcpy(dup_key, key, key_size);
+ _mesa_hash_table_insert(ht, dup_key, shader);
+
+ return shader;
+}
+
+static void
+vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key,
+ struct vc5_texture_stateobj *texstate)
+{
+ for (int i = 0; i < texstate->num_textures; i++) {
+ struct pipe_sampler_view *sampler = texstate->textures[i];
+ struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler);
+ struct pipe_sampler_state *sampler_state =
+ texstate->samplers[i];
+
+ if (!sampler)
+ continue;
+
+ key->tex[i].return_size =
+ vc5_get_tex_return_size(sampler->format);
+
+ /* For 16-bit, we set up the sampler to always return 2
+ * channels (meaning no recompiles for most statechanges),
+ * while for 32 we actually scale the returns with channels.
+ */
+ if (key->tex[i].return_size == 16) {
+ key->tex[i].return_channels = 2;
+ } else {
+ key->tex[i].return_channels =
+ vc5_get_tex_return_channels(sampler->format);
+ }
+
+ if (vc5_get_tex_return_size(sampler->format) == 32) {
+ memcpy(key->tex[i].swizzle,
+ vc5_sampler->swizzle,
+ sizeof(vc5_sampler->swizzle));
+ } else {
+ /* For 16-bit returns, we let the sampler state handle
+ * the swizzle.
+ */
+ key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+ key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+ key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+ key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+ }
+
+ if (sampler->texture->nr_samples > 1) {
+ key->tex[i].msaa_width = sampler->texture->width0;
+ key->tex[i].msaa_height = sampler->texture->height0;
+ } else if (sampler){
+ key->tex[i].compare_mode = sampler_state->compare_mode;
+ key->tex[i].compare_func = sampler_state->compare_func;
+ key->tex[i].wrap_s = sampler_state->wrap_s;
+ key->tex[i].wrap_t = sampler_state->wrap_t;
+ }
+ }
+
+ key->ucp_enables = vc5->rasterizer->base.clip_plane_enable;
+}
+
+static void
+vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct vc5_job *job = vc5->job;
+ struct v3d_fs_key local_key;
+ struct v3d_fs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_FRAMEBUFFER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_SAMPLE_MASK |
+ VC5_DIRTY_FRAGTEX |
+ VC5_DIRTY_UNCOMPILED_FS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex);
+ key->base.shader_state = vc5->prog.bind_fs;
+ key->is_points = (prim_mode == PIPE_PRIM_POINTS);
+ key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
+ prim_mode <= PIPE_PRIM_LINE_STRIP);
+ key->clamp_color = vc5->rasterizer->base.clamp_fragment_color;
+ if (vc5->blend->logicop_enable) {
+ key->logicop_func = vc5->blend->logicop_func;
+ } else {
+ key->logicop_func = PIPE_LOGICOP_COPY;
+ }
+ if (job->msaa) {
+ key->msaa = vc5->rasterizer->base.multisample;
+ key->sample_coverage = (vc5->rasterizer->base.multisample &&
+ vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+ key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage;
+ key->sample_alpha_to_one = vc5->blend->alpha_to_one;
+ }
+
+ key->depth_enabled = (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled);
+ if (vc5->zsa->base.alpha.enabled) {
+ key->alpha_test = true;
+ key->alpha_test_func = vc5->zsa->base.alpha.func;
+ }
+
+ if (vc5->framebuffer.cbufs[0]) {
+ struct pipe_surface *cbuf = vc5->framebuffer.cbufs[0];
+ const struct util_format_description *desc =
+ util_format_description(cbuf->format);
+
+ key->swap_color_rb = desc->swizzle[0] == PIPE_SWIZZLE_Z;
+ }
+
+ if (key->is_points) {
+ key->point_sprite_mask =
+ vc5->rasterizer->base.sprite_coord_enable;
+ key->point_coord_upper_left =
+ (vc5->rasterizer->base.sprite_coord_mode ==
+ PIPE_SPRITE_COORD_UPPER_LEFT);
+ }
+
+ key->light_twoside = vc5->rasterizer->base.light_twoside;
+
+ struct vc5_compiled_shader *old_fs = vc5->prog.fs;
+ vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base);
+ if (vc5->prog.fs == old_fs)
+ return;
+
+ vc5->dirty |= VC5_DIRTY_COMPILED_FS;
+
+ if (old_fs &&
+ (vc5->prog.fs->prog_data.fs->flat_shade_flags !=
+ old_fs->prog_data.fs->flat_shade_flags ||
+ (vc5->rasterizer->base.flatshade &&
+ vc5->prog.fs->prog_data.fs->color_inputs !=
+ old_fs->prog_data.fs->color_inputs))) {
+ vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+ }
+
+ if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,
+ old_fs->prog_data.fs->input_slots,
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots))) {
+ vc5->dirty |= VC5_DIRTY_FS_INPUTS;
+ }
+}
+
+static void
+vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct v3d_vs_key local_key;
+ struct v3d_vs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_VERTTEX |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_UNCOMPILED_VS |
+ VC5_DIRTY_FS_INPUTS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->verttex);
+ key->base.shader_state = vc5->prog.bind_vs;
+ key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs;
+ STATIC_ASSERT(sizeof(key->fs_inputs) ==
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->fs_inputs));
+ key->clamp_color = vc5->rasterizer->base.clamp_vertex_color;
+
+ key->per_vertex_point_size =
+ (prim_mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ struct vc5_compiled_shader *vs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (vs != vc5->prog.vs) {
+ vc5->prog.vs = vs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_VS;
+ }
+
+ key->is_coord = true;
+ /* Coord shaders only output varyings used by transform feedback. */
+ struct vc5_uncompiled_shader *shader_state = key->base.shader_state;
+ memcpy(key->fs_inputs, shader_state->tf_outputs,
+ sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_fs_inputs) {
+ memset(&key->fs_inputs[shader_state->num_tf_outputs],
+ 0,
+ sizeof(*key->fs_inputs) * (key->num_fs_inputs -
+ shader_state->num_tf_outputs));
+ }
+ key->num_fs_inputs = shader_state->num_tf_outputs;
+
+ struct vc5_compiled_shader *cs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (cs != vc5->prog.cs) {
+ vc5->prog.cs = cs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_CS;
+ }
+}
+
+void
+vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ vc5_update_compiled_fs(vc5, prim_mode);
+ vc5_update_compiled_vs(vc5, prim_mode);
+}
+
+static uint32_t
+fs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
+}
+
+static uint32_t
+vs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
+}
+
+static bool
+fs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
+}
+
+static bool
+vs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
+}
+
+static void
+delete_from_cache_if_matches(struct hash_table *ht,
+ struct vc5_compiled_shader **last_compile,
+ struct hash_entry *entry,
+ struct vc5_uncompiled_shader *so)
+{
+ const struct v3d_key *key = entry->key;
+
+ if (key->shader_state == so) {
+ struct vc5_compiled_shader *shader = entry->data;
+ _mesa_hash_table_remove(ht, entry);
+ vc5_bo_unreference(&shader->bo);
+
+ if (shader == *last_compile)
+ *last_compile = NULL;
+
+ ralloc_free(shader);
+ }
+}
+
+static void
+vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = hwcso;
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs,
+ entry, so);
+ }
+ hash_table_foreach(vc5->vs_cache, entry) {
+ delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs,
+ entry, so);
+ }
+
+ ralloc_free(so->base.ir.nir);
+ free(so);
+}
+
+static void
+vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_fs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS;
+}
+
+static void
+vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_vs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS;
+}
+
+void
+vc5_program_init(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ pctx->create_vs_state = vc5_shader_state_create;
+ pctx->delete_vs_state = vc5_shader_state_delete;
+
+ pctx->create_fs_state = vc5_shader_state_create;
+ pctx->delete_fs_state = vc5_shader_state_delete;
+
+ pctx->bind_fs_state = vc5_fp_state_bind;
+ pctx->bind_vs_state = vc5_vp_state_bind;
+
+ vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
+ fs_cache_compare);
+ vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
+ vs_cache_compare);
+}
+
+void
+vc5_program_fini(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->fs_cache, entry);
+ }
+
+ hash_table_foreach(vc5->vs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->vs_cache, entry);
+ }
+}
diff --git a/src/gallium/drivers/vc5/vc5_query.c b/src/gallium/drivers/vc5/vc5_query.c
new file mode 100644
index 00000000000..c114e76eef0
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_query.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Stub support for occlusion queries.
+ *
+ * Since we expose support for GL 2.0, we have to expose occlusion queries,
+ * but the spec allows you to expose 0 query counter bits, so we just return 0
+ * as the result of all our queries.
+ */
+#include "vc5_context.h"
+
+struct vc5_query
+{
+ uint8_t pad;
+};
+
+static struct pipe_query *
+vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+ struct vc5_query *query = calloc(1, sizeof(*query));
+
+ /* Note that struct pipe_query isn't actually defined anywhere. */
+ return (struct pipe_query *)query;
+}
+
+static void
+vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ free(query);
+}
+
+static boolean
+vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ return true;
+}
+
+static bool
+vc5_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ return true;
+}
+
+static boolean
+vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
+ boolean wait, union pipe_query_result *vresult)
+{
+ uint64_t *result = &vresult->u64;
+
+ *result = 0;
+
+ return true;
+}
+
+static void
+vc5_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+void
+vc5_query_init(struct pipe_context *pctx)
+{
+ pctx->create_query = vc5_create_query;
+ pctx->destroy_query = vc5_destroy_query;
+ pctx->begin_query = vc5_begin_query;
+ pctx->end_query = vc5_end_query;
+ pctx->get_query_result = vc5_get_query_result;
+ pctx->set_active_query_state = vc5_set_active_query_state;
+}
+
diff --git a/src/gallium/drivers/vc5/vc5_rcl.c b/src/gallium/drivers/vc5/vc5_rcl.c
new file mode 100644
index 00000000000..287a35aa33e
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_rcl.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+void
+vc5_emit_rcl(struct vc5_job *job)
+{
+ uint32_t min_x_tile = job->draw_min_x / job->tile_width;
+ uint32_t min_y_tile = job->draw_min_y / job->tile_height;
+ uint32_t max_x_tile = (job->draw_max_x - 1) / job->tile_width;
+ uint32_t max_y_tile = (job->draw_max_y - 1) / job->tile_height;
+
+ /* The RCL list should be empty. */
+ assert(!job->rcl.bo);
+
+ vc5_cl_ensure_space(&job->rcl,
+ 256 +
+ (64 *
+ (max_x_tile - min_x_tile + 1) *
+ (max_y_tile - min_y_tile + 1)), 1);
+
+ job->submit.rcl_start = job->rcl.bo->offset;
+ vc5_job_add_bo(job, job->rcl.bo);
+
+ int nr_cbufs = 0;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i])
+ nr_cbufs = i + 1;
+ }
+
+ /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
+ * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are
+ * optional updates to the previous HW state.
+ */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
+ config) {
+ config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
+ config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
+
+ config.early_z_disable = !job->uses_early_z;
+
+ config.image_width_pixels = job->draw_width;
+ config.image_height_pixels = job->draw_height;
+
+ config.number_of_render_targets_minus_1 =
+ MAX2(nr_cbufs, 1) - 1;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ for (int i = 0; i < nr_cbufs; i++) {
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+ struct pipe_surface *psurf = job->cbufs[i];
+ if (!psurf)
+ continue;
+
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+ rt.address = cl_address(rsc->bo, surf->offset);
+ rt.internal_type = surf->internal_type;
+ rt.output_image_format = surf->format;
+ rt.memory_format = surf->tiling;
+ rt.internal_bpp = surf->internal_bpp;
+ rt.render_target_number = i;
+
+ if (job->resolve & PIPE_CLEAR_COLOR0 << i)
+ rsc->writes++;
+ }
+ }
+
+ /* TODO: Don't bother emitting if we don't load/clear Z/S. */
+ if (job->zsbuf) {
+ struct pipe_surface *psurf = job->zsbuf;
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
+ zs.address = cl_address(rsc->bo, surf->offset);
+
+ zs.internal_type = surf->internal_type;
+ zs.output_image_format = surf->format;
+
+ struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+ /* XXX */
+ zs.padded_height_of_output_image_in_uif_blocks =
+ (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp));
+
+ assert(surf->tiling != VC5_TILING_RASTER);
+ zs.memory_format = surf->tiling;
+ }
+
+ if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL)
+ rsc->writes++;
+ }
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
+ clear) {
+ clear.clear_color_low_32_bits = job->clear_color[0];
+ };
+
+ /* Ends rendering mode config. */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
+ clear) {
+ clear.z_s_clear_value = job->clear_zs;
+ };
+
+ /* Always set initial block size before the first branch, which needs
+ * to match the value from binning mode config.
+ */
+ cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
+
+ /* Start by clearing the tile buffer. */
+ cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+ coords.tile_column_number = 0;
+ coords.tile_row_number = 0;
+ }
+
+ cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+
+ cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
+
+ const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
+ PIPE_CLEAR_COLOR1 |
+ PIPE_CLEAR_COLOR2 |
+ PIPE_CLEAR_COLOR3);
+ const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
+
+ for (int y = min_y_tile; y <= max_y_tile; y++) {
+ for (int x = min_x_tile; x <= max_x_tile; x++) {
+ uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
+
+ /* The initial reload will be queued until we get the
+ * tile coordinates.
+ */
+ if (read_but_not_cleared) {
+ cl_emit(&job->rcl, RELOAD_TILE_COLOUR_BUFFER, load) {
+ load.disable_colour_buffer_load =
+ (~read_but_not_cleared & pipe_clear_color_buffers) >>
+ first_color_buffer_bit;
+ load.enable_z_load =
+ read_but_not_cleared & PIPE_CLEAR_DEPTH;
+ load.enable_stencil_load =
+ read_but_not_cleared & PIPE_CLEAR_STENCIL;
+ }
+ }
+
+ /* Tile Coordinates triggers the reload and sets where
+ * the stores go. There must be one per store packet.
+ */
+ cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+ coords.tile_column_number = x;
+ coords.tile_row_number = y;
+ }
+
+ cl_emit(&job->rcl, BRANCH_TO_AUTO_CHAINED_SUB_LIST, branch) {
+ uint32_t bin_tile_stride =
+ (align(job->draw_width,
+ job->tile_width) /
+ job->tile_width);
+ uint32_t bin_index =
+ (y * bin_tile_stride + x);
+ branch.address = cl_address(job->tile_alloc,
+ 64 * bin_index);
+ }
+
+ cl_emit(&job->rcl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+ uint32_t color_write_enables =
+ job->resolve >> first_color_buffer_bit;
+
+ store.disable_color_buffer_write = (~color_write_enables) & 0xf;
+ store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
+ store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
+
+ store.disable_colour_buffers_clear_on_write =
+ (job->cleared & pipe_clear_color_buffers) == 0;
+ store.disable_z_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_DEPTH);
+ store.disable_stencil_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_STENCIL);
+
+ store.last_tile_of_frame = (x == max_x_tile &&
+ y == max_y_tile);
+ };
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc5/vc5_resource.c b/src/gallium/drivers/vc5/vc5_resource.c
new file mode 100644
index 00000000000..8dbdb71e735
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_resource.c
@@ -0,0 +1,758 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blit.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
+
+#include "drm_fourcc.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
+#endif
+
+static bool
+vc5_resource_bo_alloc(struct vc5_resource *rsc)
+{
+ struct pipe_resource *prsc = &rsc->base.b;
+ struct pipe_screen *pscreen = prsc->screen;
+ struct vc5_bo *bo;
+ int layers = (prsc->target == PIPE_TEXTURE_3D ?
+ prsc->depth0 : prsc->array_size);
+
+ bo = vc5_bo_alloc(vc5_screen(pscreen),
+ rsc->slices[0].offset +
+ rsc->slices[0].size +
+ rsc->cube_map_stride * layers - 1,
+ "resource");
+ if (bo) {
+ DBG(V3D_DEBUG_SURFACE, "alloc %p @ 0x%08x:\n", rsc, bo->offset);
+ vc5_bo_unreference(&rsc->bo);
+ rsc->bo = bo;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void
+vc5_resource_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *ptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_transfer *trans = vc5_transfer(ptrans);
+
+ if (trans->map) {
+ struct vc5_resource *rsc;
+ struct vc5_resource_slice *slice;
+ if (trans->ss_resource) {
+ rsc = vc5_resource(trans->ss_resource);
+ slice = &rsc->slices[0];
+ } else {
+ rsc = vc5_resource(ptrans->resource);
+ slice = &rsc->slices[ptrans->level];
+ }
+
+ if (ptrans->usage & PIPE_TRANSFER_WRITE) {
+ vc5_store_tiled_image(rsc->bo->map + slice->offset +
+ ptrans->box.z * rsc->cube_map_stride,
+ slice->stride,
+ trans->map, ptrans->stride,
+ slice->tiling, rsc->cpp,
+ rsc->base.b.height0,
+ &ptrans->box);
+ }
+ free(trans->map);
+ }
+
+ if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) {
+ struct pipe_blit_info blit;
+ memset(&blit, 0, sizeof(blit));
+
+ blit.src.resource = trans->ss_resource;
+ blit.src.format = trans->ss_resource->format;
+ blit.src.box.width = trans->ss_box.width;
+ blit.src.box.height = trans->ss_box.height;
+ blit.src.box.depth = 1;
+
+ blit.dst.resource = ptrans->resource;
+ blit.dst.format = ptrans->resource->format;
+ blit.dst.level = ptrans->level;
+ blit.dst.box = trans->ss_box;
+
+ blit.mask = util_format_get_mask(ptrans->resource->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pctx->blit(pctx, &blit);
+
+ pipe_resource_reference(&trans->ss_resource, NULL);
+ }
+
+ pipe_resource_reference(&ptrans->resource, NULL);
+ slab_free(&vc5->transfer_pool, ptrans);
+}
+
+static struct pipe_resource *
+vc5_get_temp_resource(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ const struct pipe_box *box)
+{
+ struct pipe_resource temp_setup;
+
+ memset(&temp_setup, 0, sizeof(temp_setup));
+ temp_setup.target = prsc->target;
+ temp_setup.format = prsc->format;
+ temp_setup.width0 = box->width;
+ temp_setup.height0 = box->height;
+ temp_setup.depth0 = 1;
+ temp_setup.array_size = 1;
+
+ return pctx->screen->resource_create(pctx->screen, &temp_setup);
+}
+
+static void *
+vc5_resource_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **pptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_transfer *trans;
+ struct pipe_transfer *ptrans;
+ enum pipe_format format = prsc->format;
+ char *buf;
+
+ /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+ * being mapped.
+ */
+ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
+ prsc->last_level == 0 &&
+ prsc->width0 == box->width &&
+ prsc->height0 == box->height &&
+ prsc->depth0 == box->depth &&
+ prsc->array_size == 1 &&
+ rsc->bo->private) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ if (vc5_resource_bo_alloc(rsc)) {
+ /* If it might be bound as one of our vertex buffers
+ * or UBOs, make sure we re-emit vertex buffer state
+ * or uniforms.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+ if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER)
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+ } else {
+ /* If we failed to reallocate, flush users so that we
+ * don't violate any syncing requirements.
+ */
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ }
+ } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* If we're writing and the buffer is being used by the CL, we
+ * have to flush the CL first. If we're only reading, we need
+ * to flush if the CL has written our buffer.
+ */
+ if (usage & PIPE_TRANSFER_WRITE)
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ else
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+ }
+
+ if (usage & PIPE_TRANSFER_WRITE) {
+ rsc->writes++;
+ rsc->initialized_buffers = ~0;
+ }
+
+ trans = slab_alloc(&vc5->transfer_pool);
+ if (!trans)
+ return NULL;
+
+ /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */
+
+ /* slab_alloc_st() doesn't zero: */
+ memset(trans, 0, sizeof(*trans));
+ ptrans = &trans->base;
+
+ pipe_resource_reference(&ptrans->resource, prsc);
+ ptrans->level = level;
+ ptrans->usage = usage;
+ ptrans->box = *box;
+
+ /* If the resource is multisampled, we need to resolve to single
+ * sample. This seems like it should be handled at a higher layer.
+ */
+ if (prsc->nr_samples > 1) {
+ trans->ss_resource = vc5_get_temp_resource(pctx, prsc, box);
+ if (!trans->ss_resource)
+ goto fail;
+ assert(!trans->ss_resource->nr_samples);
+
+ /* The ptrans->box gets modified for tile alignment, so save
+ * the original box for unmap time.
+ */
+ trans->ss_box = *box;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ struct pipe_blit_info blit;
+ memset(&blit, 0, sizeof(blit));
+
+ blit.src.resource = ptrans->resource;
+ blit.src.format = ptrans->resource->format;
+ blit.src.level = ptrans->level;
+ blit.src.box = trans->ss_box;
+
+ blit.dst.resource = trans->ss_resource;
+ blit.dst.format = trans->ss_resource->format;
+ blit.dst.box.width = trans->ss_box.width;
+ blit.dst.box.height = trans->ss_box.height;
+ blit.dst.box.depth = 1;
+
+ blit.mask = util_format_get_mask(prsc->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pctx->blit(pctx, &blit);
+ vc5_flush_jobs_writing_resource(vc5, blit.dst.resource);
+ }
+
+ /* The rest of the mapping process should use our temporary. */
+ prsc = trans->ss_resource;
+ rsc = vc5_resource(prsc);
+ ptrans->box.x = 0;
+ ptrans->box.y = 0;
+ ptrans->box.z = 0;
+ }
+
+ /* Note that the current kernel implementation is synchronous, so no
+ * need to do syncing stuff here yet.
+ */
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ buf = vc5_bo_map_unsynchronized(rsc->bo);
+ else
+ buf = vc5_bo_map(rsc->bo);
+ if (!buf) {
+ fprintf(stderr, "Failed to map bo\n");
+ goto fail;
+ }
+
+ *pptrans = ptrans;
+
+ struct vc5_resource_slice *slice = &rsc->slices[level];
+ if (rsc->tiled) {
+ /* No direct mappings of tiled, since we need to manually
+ * tile/untile.
+ */
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ ptrans->stride = ptrans->box.width * rsc->cpp;
+ ptrans->layer_stride = ptrans->stride * ptrans->box.height;
+
+ trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
+
+ if (usage & PIPE_TRANSFER_READ) {
+ vc5_load_tiled_image(trans->map, ptrans->stride,
+ buf + slice->offset +
+ ptrans->box.z * rsc->cube_map_stride,
+ slice->stride,
+ slice->tiling, rsc->cpp,
+ rsc->base.b.height0,
+ &ptrans->box);
+ }
+ return trans->map;
+ } else {
+ ptrans->stride = slice->stride;
+ ptrans->layer_stride = ptrans->stride;
+
+ return buf + slice->offset +
+ ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride +
+ ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp +
+ ptrans->box.z * rsc->cube_map_stride;
+ }
+
+
+fail:
+ vc5_resource_transfer_unmap(pctx, ptrans);
+ return NULL;
+}
+
+static void
+vc5_resource_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ vc5_bo_unreference(&rsc->bo);
+ free(rsc);
+}
+
+static boolean
+vc5_resource_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *prsc,
+ struct winsys_handle *whandle)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_bo *bo = rsc->bo;
+
+ whandle->stride = rsc->slices[0].stride;
+
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching).
+ */
+ bo->private = false;
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ return vc5_bo_flink(bo, &whandle->handle);
+ case DRM_API_HANDLE_TYPE_KMS:
+ whandle->handle = bo->handle;
+ return TRUE;
+ case DRM_API_HANDLE_TYPE_FD:
+ whandle->handle = vc5_bo_get_dmabuf(bo);
+ return whandle->handle != -1;
+ }
+
+ return FALSE;
+}
+
+static const struct u_resource_vtbl vc5_resource_vtbl = {
+ .resource_get_handle = vc5_resource_get_handle,
+ .resource_destroy = vc5_resource_destroy,
+ .transfer_map = vc5_resource_transfer_map,
+ .transfer_flush_region = u_default_transfer_flush_region,
+ .transfer_unmap = vc5_resource_transfer_unmap,
+};
+
+static void
+vc5_setup_slices(struct vc5_resource *rsc, const char *caller)
+{
+ struct pipe_resource *prsc = &rsc->base.b;
+ uint32_t width = prsc->width0;
+ uint32_t height = prsc->height0;
+ uint32_t pot_width = util_next_power_of_two(width);
+ uint32_t pot_height = util_next_power_of_two(height);
+ uint32_t offset = 0;
+ uint32_t utile_w = vc5_utile_width(rsc->cpp);
+ uint32_t utile_h = vc5_utile_height(rsc->cpp);
+ uint32_t uif_block_w = utile_w * 2;
+ uint32_t uif_block_h = utile_h * 2;
+ bool uif_top = false;
+
+ for (int i = prsc->last_level; i >= 0; i--) {
+ struct vc5_resource_slice *slice = &rsc->slices[i];
+
+ uint32_t level_width, level_height;
+ if (i < 2) {
+ level_width = u_minify(width, i);
+ level_height = u_minify(height, i);
+ } else {
+ level_width = u_minify(pot_width, i);
+ level_height = u_minify(pot_height, i);
+ }
+
+ if (!rsc->tiled) {
+ slice->tiling = VC5_TILING_RASTER;
+ if (prsc->nr_samples > 1) {
+ /* MSAA (4x) surfaces are stored as raw tile buffer contents. */
+ level_width = align(level_width, 32);
+ level_height = align(level_height, 32);
+ }
+ } else {
+ if ((i != 0 || !uif_top) &&
+ (level_width <= utile_w ||
+ level_height <= utile_h)) {
+ slice->tiling = VC5_TILING_LINEARTILE;
+ level_width = align(level_width, utile_w);
+ level_height = align(level_height, utile_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
+ level_width = align(level_width, uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= 2 * uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
+ level_width = align(level_width, 2 * uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else {
+ slice->tiling = VC5_TILING_UIF_NO_XOR;
+
+ level_width = align(level_width,
+ 4 * uif_block_w);
+ level_height = align(level_height,
+ 4 * uif_block_h);
+ }
+ }
+
+ slice->offset = offset;
+ slice->stride = (level_width * rsc->cpp *
+ MAX2(prsc->nr_samples, 1));
+ slice->size = level_height * slice->stride;
+
+ offset += slice->size;
+
+ if (V3D_DEBUG & V3D_DEBUG_SURFACE) {
+ static const char *const tiling_descriptions[] = {
+ [VC5_TILING_RASTER] = "R",
+ [VC5_TILING_LINEARTILE] = "LT",
+ [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1",
+ [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2",
+ [VC5_TILING_UIF_NO_XOR] = "UIF",
+ [VC5_TILING_UIF_XOR] = "UIF^",
+ };
+
+ fprintf(stderr,
+ "rsc %s %p (format %s), %dx%d: "
+ "level %d (%s) %dx%d -> %dx%d, stride %d@0x%08x\n",
+ caller, rsc,
+ util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ i, tiling_descriptions[slice->tiling],
+ u_minify(prsc->width0, i),
+ u_minify(prsc->height0, i),
+ level_width, level_height,
+ slice->stride, slice->offset);
+ }
+ }
+
+ /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only
+ * needs to be aligned to utile boundaries. Since tiles are laid out
+ * from small to big in memory, we need to align the later UIF slices
+ * to UIF blocks, if they were preceded by non-UIF-block-aligned LT
+ * slices.
+ *
+ * We additionally align to 4k, which improves UIF XOR performance.
+ */
+ uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) -
+ rsc->slices[0].offset);
+ if (page_align_offset) {
+ for (int i = 0; i <= prsc->last_level; i++)
+ rsc->slices[i].offset += page_align_offset;
+ }
+
+ /* Arrays, cubes, and 3D textures have a stride which is the distance
+ * from one full mipmap tree to the next (64b aligned).
+ */
+ rsc->cube_map_stride = align(rsc->slices[0].offset +
+ rsc->slices[0].size, 64);
+}
+
+static struct vc5_resource *
+vc5_resource_setup(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource);
+ if (!rsc)
+ return NULL;
+ struct pipe_resource *prsc = &rsc->base.b;
+
+ *prsc = *tmpl;
+
+ pipe_reference_init(&prsc->reference, 1);
+ prsc->screen = pscreen;
+
+ rsc->base.vtbl = &vc5_resource_vtbl;
+ if (prsc->nr_samples <= 1)
+ rsc->cpp = util_format_get_blocksize(tmpl->format);
+ else
+ rsc->cpp = sizeof(uint32_t);
+
+ assert(rsc->cpp);
+
+ return rsc;
+}
+
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (haystack[i] == needle)
+ return true;
+ }
+
+ return false;
+}
+
+static struct pipe_resource *
+vc5_resource_create_with_modifiers(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers,
+ int count)
+{
+ bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base.b;
+ /* Use a tiled layout if we can, for better 3D performance. */
+ bool should_tile = true;
+
+ /* VBOs/PBOs are untiled (and 1 height). */
+ if (tmpl->target == PIPE_BUFFER)
+ should_tile = false;
+
+ /* Cursors are always linear, and the user can request linear as well.
+ */
+ if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR))
+ should_tile = false;
+
+ /* Scanout BOs for simulator need to be linear for interaction with
+ * i965.
+ */
+ if (using_vc5_simulator &&
+ tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+ should_tile = false;
+
+ /* No user-specified modifier; determine our own. */
+ if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
+ linear_ok = true;
+ rsc->tiled = should_tile;
+ } else if (should_tile &&
+ find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+ modifiers, count)) {
+ rsc->tiled = true;
+ } else if (linear_ok) {
+ rsc->tiled = false;
+ } else {
+ fprintf(stderr, "Unsupported modifier requested\n");
+ return NULL;
+ }
+
+ if (tmpl->target != PIPE_BUFFER)
+ rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+ vc5_setup_slices(rsc, "create");
+ if (!vc5_resource_bo_alloc(rsc))
+ goto fail;
+
+ return prsc;
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+struct pipe_resource *
+vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+ return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
+static struct pipe_resource *
+vc5_resource_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base.b;
+ struct vc5_resource_slice *slice = &rsc->slices[0];
+
+ if (!rsc)
+ return NULL;
+
+ switch (whandle->modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ rsc->tiled = false;
+ break;
+ /* XXX: UIF */
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported modifier 0x%llx\n",
+ (long long)whandle->modifier);
+ goto fail;
+ }
+
+ if (whandle->offset != 0) {
+ fprintf(stderr,
+ "Attempt to import unsupported winsys offset %u\n",
+ whandle->offset);
+ goto fail;
+ }
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ rsc->bo = vc5_bo_open_name(screen,
+ whandle->handle, whandle->stride);
+ break;
+ case DRM_API_HANDLE_TYPE_FD:
+ rsc->bo = vc5_bo_open_dmabuf(screen,
+ whandle->handle, whandle->stride);
+ break;
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported handle type %d\n",
+ whandle->type);
+ goto fail;
+ }
+
+ if (!rsc->bo)
+ goto fail;
+
+ vc5_setup_slices(rsc, "import");
+
+ rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+ DBG(V3D_DEBUG_SURFACE,
+ "rsc import %p (format %s), %dx%d: "
+ "level 0 (R) -> stride %d@0x%08x\n",
+ rsc, util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ slice->stride, slice->offset);
+
+ if (whandle->stride != slice->stride) {
+ static bool warned = false;
+ if (!warned) {
+ warned = true;
+ fprintf(stderr,
+ "Attempting to import %dx%d %s with "
+ "unsupported stride %d instead of %d\n",
+ prsc->width0, prsc->height0,
+ util_format_short_name(prsc->format),
+ whandle->stride,
+ slice->stride);
+ }
+ goto fail;
+ }
+
+ return prsc;
+
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+static struct pipe_surface *
+vc5_create_surface(struct pipe_context *pctx,
+ struct pipe_resource *ptex,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface);
+ struct vc5_resource *rsc = vc5_resource(ptex);
+
+ if (!surface)
+ return NULL;
+
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+ struct pipe_surface *psurf = &surface->base;
+ unsigned level = surf_tmpl->u.tex.level;
+
+ pipe_reference_init(&psurf->reference, 1);
+ pipe_resource_reference(&psurf->texture, ptex);
+
+ psurf->context = pctx;
+ psurf->format = surf_tmpl->format;
+ psurf->width = u_minify(ptex->width0, level);
+ psurf->height = u_minify(ptex->height0, level);
+ psurf->u.tex.level = level;
+ psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+ surface->offset = (rsc->slices[level].offset +
+ psurf->u.tex.first_layer * rsc->cube_map_stride);
+ surface->tiling = rsc->slices[level].tiling;
+ surface->format = vc5_get_rt_format(psurf->format);
+
+ if (util_format_is_depth_or_stencil(psurf->format)) {
+ switch (psurf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_16;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_32F;
+ break;
+ default:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_24;
+ }
+ } else {
+ uint32_t bpp, type;
+ vc5_get_internal_type_bpp_for_output_format(surface->format,
+ &type, &bpp);
+ surface->internal_type = type;
+ surface->internal_bpp = bpp;
+ }
+
+ return &surface->base;
+}
+
+static void
+vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+ pipe_resource_reference(&psurf->texture, NULL);
+ FREE(psurf);
+}
+
+static void
+vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
+{
+ /* All calls to flush_resource are followed by a flush of the context,
+ * so there's nothing to do.
+ */
+}
+
+void
+vc5_resource_screen_init(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create_with_modifiers =
+ vc5_resource_create_with_modifiers;
+ pscreen->resource_create = vc5_resource_create;
+ pscreen->resource_from_handle = vc5_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void
+vc5_resource_context_init(struct pipe_context *pctx)
+{
+ pctx->transfer_map = u_transfer_map_vtbl;
+ pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pctx->transfer_unmap = u_transfer_unmap_vtbl;
+ pctx->buffer_subdata = u_default_buffer_subdata;
+ pctx->texture_subdata = u_default_texture_subdata;
+ pctx->create_surface = vc5_create_surface;
+ pctx->surface_destroy = vc5_surface_destroy;
+ pctx->resource_copy_region = util_resource_copy_region;
+ pctx->blit = vc5_blit;
+ pctx->flush_resource = vc5_flush_resource;
+}
diff --git a/src/gallium/drivers/vc5/vc5_resource.h b/src/gallium/drivers/vc5/vc5_resource.h
new file mode 100644
index 00000000000..3440fdc947b
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_resource.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_RESOURCE_H
+#define VC5_RESOURCE_H
+
+#include "vc5_screen.h"
+#include "util/u_transfer.h"
+
+/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These
+ * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB
+ * page. Those pages are then arranged left-to-right, top-to-bottom, to cover
+ * an image.
+ *
+ * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte
+ * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp).
+ */
+
+/**
+ * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory
+ * Format field of render target and Z/Stencil config.
+ */
+enum vc5_tiling_mode {
+ /* Untiled resources. Not valid as texture inputs. */
+ VC5_TILING_RASTER,
+
+ /* Single line of u-tiles. */
+ VC5_TILING_LINEARTILE,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_1_COLUMN,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_2_COLUMN,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_NO_XOR,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_XOR,
+};
+
+struct vc5_transfer {
+ struct pipe_transfer base;
+ void *map;
+
+ struct pipe_resource *ss_resource;
+ struct pipe_box ss_box;
+};
+
+struct vc5_resource_slice {
+ uint32_t offset;
+ uint32_t stride;
+ uint32_t size;
+ enum vc5_tiling_mode tiling;
+};
+
+struct vc5_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ enum vc5_tiling_mode tiling;
+ /**
+ * Output image format for TILE_RENDERING_MODE_CONFIGURATION
+ */
+ uint8_t format;
+
+ /**
+ * Internal format of the tile buffer for
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_type;
+
+ /**
+ * internal bpp value (0=32bpp, 2=128bpp) for color buffers in
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_bpp;
+};
+
+struct vc5_resource {
+ struct u_resource base;
+ struct vc5_bo *bo;
+ struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS];
+ uint32_t cube_map_stride;
+ int cpp;
+ bool tiled;
+ /** One of V3D_TEXTURE_DATA_FORMAT_* */
+ uint8_t tex_format;
+
+ /**
+ * Number of times the resource has been written to.
+ *
+ * This is used to track whether we need to load the surface on first
+ * rendering.
+ */
+ uint64_t writes;
+
+ /**
+ * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
+ * for which parts of the resource are defined.
+ *
+ * Used for avoiding fallback to quad clears for clearing just depth,
+ * when the stencil contents have never been initialized. Note that
+ * we're lazy and fields not present in the buffer (DEPTH in a color
+ * buffer) may get marked.
+ */
+ uint32_t initialized_buffers;
+};
+
+static inline struct vc5_resource *
+vc5_resource(struct pipe_resource *prsc)
+{
+ return (struct vc5_resource *)prsc;
+}
+
+static inline struct vc5_surface *
+vc5_surface(struct pipe_surface *psurf)
+{
+ return (struct vc5_surface *)psurf;
+}
+
+static inline struct vc5_transfer *
+vc5_transfer(struct pipe_transfer *ptrans)
+{
+ return (struct vc5_transfer *)ptrans;
+}
+
+void vc5_resource_screen_init(struct pipe_screen *pscreen);
+void vc5_resource_context_init(struct pipe_context *pctx);
+struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl);
+
+#endif /* VC5_RESOURCE_H */
diff --git a/src/gallium/drivers/vc5/vc5_screen.c b/src/gallium/drivers/vc5/vc5_screen.c
new file mode 100644
index 00000000000..d3c9f0962e3
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_screen.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "os/os_misc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/ralloc.h"
+
+#include <xf86drm.h>
+#include "vc5_drm.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "compiler/v3d_compiler.h"
+
+static const char *
+vc5_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ if (!screen->name) {
+ screen->name = ralloc_asprintf(screen,
+ "VC5 V3D %d.%d",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ }
+
+ return screen->name;
+}
+
+static const char *
+vc5_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "Broadcom";
+}
+
+static void
+vc5_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ util_hash_table_destroy(screen->bo_handles);
+ vc5_bufmgr_destroy(pscreen);
+ slab_destroy_parent(&screen->transfer_pool);
+
+ if (using_vc5_simulator)
+ vc5_simulator_destroy(screen);
+
+ v3d_compiler_free(screen->compiler);
+
+ close(screen->fd);
+ ralloc_free(pscreen);
+}
+
+static int
+vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_INDEP_BLEND_ENABLE: /* XXX */
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+ case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+ case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_DRAW_INDIRECT:
+ return 1;
+
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 4;
+
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 400;
+
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+ return 1;
+
+
+ /* Stream output. */
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 64;
+
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 7;
+
+ /* Unsupported features. */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_CULL_DISTANCE:
+ case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+ case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+ case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+ case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+ case PIPE_CAP_TGSI_FS_FBFETCH:
+ case PIPE_CAP_INT64:
+ case PIPE_CAP_INT64_DIVMOD:
+ case PIPE_CAP_DOUBLES:
+ case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+ case PIPE_CAP_TGSI_BALLOT:
+ case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+ case PIPE_CAP_TGSI_CLOCK:
+ case PIPE_CAP_TGSI_TEX_TXF_LZ:
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TILE_RASTER_ORDER:
+ return 0;
+
+ /* Geometry shader output, unsupported. */
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return VC5_MAX_MIP_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 256;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 2048;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+
+ /* Queries. */
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 0;
+
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
+
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0x14E4;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int)(system_memory >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return 1;
+
+ default:
+ fprintf(stderr, "unknown param %d\n", param);
+ return 0;
+ }
+}
+
+static float
+vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 32;
+
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 512.0f;
+
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 0.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 0.0f;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0f;
+ default:
+ fprintf(stderr, "unknown paramf %d\n", param);
+ return 0;
+ }
+}
+
+static int
+vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ if (shader != PIPE_SHADER_VERTEX &&
+ shader != PIPE_SHADER_FRAGMENT) {
+ return 0;
+ }
+
+ /* this is probably not totally correct.. but it's a start: */
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return UINT_MAX;
+
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return VC5_MAX_FS_INPUTS / 4;
+ else
+ return 16;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return shader == PIPE_SHADER_FRAGMENT ? 4 : 8;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 16 * 1024 * sizeof(float);
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 16;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return VC5_MAX_TEXTURE_SAMPLERS;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_NIR;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ return 0;
+ default:
+ fprintf(stderr, "unknown shader param %d\n", param);
+ return 0;
+ }
+ return 0;
+}
+
+static boolean
+vc5_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage)
+{
+ unsigned retval = 0;
+
+ if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES)
+ return FALSE;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ !util_format_is_supported(format, usage)) {
+ return FALSE;
+ }
+
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ switch (format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ case PIPE_FORMAT_R32G32_SNORM:
+ case PIPE_FORMAT_R32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ case PIPE_FORMAT_R32G32_SSCALED:
+ case PIPE_FORMAT_R32_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ case PIPE_FORMAT_R16G16_USCALED:
+ case PIPE_FORMAT_R16_USCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ case PIPE_FORMAT_R8G8_USCALED:
+ case PIPE_FORMAT_R8_USCALED:
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ case PIPE_FORMAT_R8G8_SSCALED:
+ case PIPE_FORMAT_R8_SSCALED:
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if ((usage & PIPE_BIND_RENDER_TARGET) &&
+ vc5_rt_format_supported(format)) {
+ retval |= PIPE_BIND_RENDER_TARGET;
+ }
+
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ vc5_tex_format_supported(format)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+ format == PIPE_FORMAT_X8Z24_UNORM ||
+ format == PIPE_FORMAT_Z16_UNORM ||
+ format == PIPE_FORMAT_Z32_FLOAT ||
+ format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (format == PIPE_FORMAT_I8_UINT ||
+ format == PIPE_FORMAT_I16_UINT ||
+ format == PIPE_FORMAT_I32_UINT)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+#if 0
+ if (retval != usage) {
+ fprintf(stderr,
+ "not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=0x%x, retval=0x%x\n", util_format_name(format),
+ target, sample_count, usage, retval);
+ }
+#endif
+
+ return retval == usage;
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+static bool
+vc5_get_device_info(struct vc5_screen *screen)
+{
+ struct drm_vc5_get_param ident0 = {
+ .param = DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+ };
+ struct drm_vc5_get_param ident1 = {
+ .param = DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+ };
+ int ret;
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident0);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n",
+ strerror(errno));
+ return false;
+ }
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident1);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ uint32_t major = (ident0.value >> 24) & 0xff;
+ uint32_t minor = (ident1.value >> 0) & 0xf;
+ screen->devinfo.ver = major * 10 + minor;
+
+ if (screen->devinfo.ver != 33) {
+ fprintf(stderr,
+ "V3D %d.%d not supported by this version of Mesa.\n",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ return false;
+ }
+
+ return true;
+}
+
+static const void *
+vc5_screen_get_compiler_options(struct pipe_screen *pscreen,
+ enum pipe_shader_ir ir, unsigned shader)
+{
+ return &v3d_nir_options;
+}
+
+struct pipe_screen *
+vc5_screen_create(int fd)
+{
+ struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen);
+ struct pipe_screen *pscreen;
+
+ pscreen = &screen->base;
+
+ pscreen->destroy = vc5_screen_destroy;
+ pscreen->get_param = vc5_screen_get_param;
+ pscreen->get_paramf = vc5_screen_get_paramf;
+ pscreen->get_shader_param = vc5_screen_get_shader_param;
+ pscreen->context_create = vc5_context_create;
+ pscreen->is_format_supported = vc5_screen_is_format_supported;
+
+ screen->fd = fd;
+ list_inithead(&screen->bo_cache.time_list);
+ (void)mtx_init(&screen->bo_handles_mutex, mtx_plain);
+ screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+
+#if defined(USE_VC5_SIMULATOR)
+ vc5_simulator_init(screen);
+#endif
+
+ if (!vc5_get_device_info(screen))
+ goto fail;
+
+ slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16);
+
+ vc5_fence_init(screen);
+
+ v3d_process_debug_variable();
+
+ vc5_resource_screen_init(pscreen);
+
+ screen->compiler = v3d_compiler_init(&screen->devinfo);
+
+ pscreen->get_name = vc5_screen_get_name;
+ pscreen->get_vendor = vc5_screen_get_vendor;
+ pscreen->get_device_vendor = vc5_screen_get_vendor;
+ pscreen->get_compiler_options = vc5_screen_get_compiler_options;
+
+ return pscreen;
+
+fail:
+ close(fd);
+ ralloc_free(pscreen);
+ return NULL;
+}
diff --git a/src/gallium/drivers/vc5/vc5_screen.h b/src/gallium/drivers/vc5/vc5_screen.h
new file mode 100644
index 00000000000..d804efa1bb7
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_screen.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_SCREEN_H
+#define VC5_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "os/os_thread.h"
+#include "state_tracker/drm_driver.h"
+#include "util/list.h"
+#include "util/slab.h"
+#include "broadcom/common/v3d_debug.h"
+#include "broadcom/common/v3d_device_info.h"
+
+struct vc5_bo;
+
+#define VC5_MAX_MIP_LEVELS 12
+#define VC5_MAX_TEXTURE_SAMPLERS 32
+#define VC5_MAX_SAMPLES 4
+#define VC5_MAX_DRAW_BUFFERS 4
+
+struct vc5_simulator_file;
+
+struct vc5_screen {
+ struct pipe_screen base;
+ int fd;
+
+ struct v3d_device_info devinfo;
+
+ const char *name;
+
+ /** The last seqno we've completed a wait for.
+ *
+ * This lets us slightly optimize our waits by skipping wait syscalls
+ * if we know the job's already done.
+ */
+ uint64_t finished_seqno;
+
+ struct slab_parent_pool transfer_pool;
+
+ struct vc5_bo_cache {
+ /** List of struct vc5_bo freed, by age. */
+ struct list_head time_list;
+ /** List of struct vc5_bo freed, per size, by age. */
+ struct list_head *size_list;
+ uint32_t size_list_size;
+
+ mtx_t lock;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+ } bo_cache;
+
+ const struct v3d_compiler *compiler;
+
+ struct util_hash_table *bo_handles;
+ mtx_t bo_handles_mutex;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+
+ struct vc5_simulator_file *sim_file;
+};
+
+static inline struct vc5_screen *
+vc5_screen(struct pipe_screen *screen)
+{
+ return (struct vc5_screen *)screen;
+}
+
+struct pipe_screen *vc5_screen_create(int fd);
+
+void
+vc5_fence_init(struct vc5_screen *screen);
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno);
+
+#endif /* VC5_SCREEN_H */
diff --git a/src/gallium/drivers/vc5/vc5_simulator.c b/src/gallium/drivers/vc5/vc5_simulator.c
new file mode 100644
index 00000000000..3f783ea5b13
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_simulator.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_simulator.c
+ *
+ * Implements VC5 simulation on top of a non-VC5 GEM fd.
+ *
+ * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on
+ * top of the simpenrose software simulator. Generally, VC5 driver BOs have a
+ * GEM-side copy of their contents and a simulator-side memory area that the
+ * GEM contents get copied into during simulation. Once simulation is done,
+ * the simulator's data is copied back out to the GEM BOs, so that rendering
+ * appears on the screen as if actual hardware rendering had been done.
+ *
+ * One of the limitations of this code is that we shouldn't really need a
+ * GEM-side BO for non-window-system BOs. However, do we need unique BO
+ * handles for each of our GEM bos so that this file can look up its state
+ * from the handle passed in at submit ioctl time (also, a couple of places
+ * outside of this file still call ioctls directly on the fd).
+ *
+ * Another limitation is that BO import doesn't work unless the underlying
+ * window system's BO size matches what VC5 is going to use, which of course
+ * doesn't work out in practice. This means that for now, only DRI3 (VC5
+ * makes the winsys BOs) is supported, not DRI2 (window system makes the winys
+ * BOs).
+ */
+
+#ifdef USE_VC5_SIMULATOR
+
+#include <sys/mman.h>
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/u_memory.h"
+#include "util/u_mm.h"
+
+#define HW_REGISTER_RO(x) (x)
+#define HW_REGISTER_RW(x) (x)
+#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#define V3D_TECH_VERSION 3
+#define V3D_REVISION 3
+#define V3D_SUB_REV 0
+#define V3D_HIDDEN_REV 0
+#undef unreachable
+#include "v3d_hw_auto.h"
+
+/** Global (across GEM fds) state for the simulator */
+static struct vc5_simulator_state {
+ mtx_t mutex;
+
+ struct v3d_hw *v3d;
+
+ /* Base virtual address of the heap. */
+ void *mem;
+ /* Base hardware address of the heap. */
+ uint32_t mem_base;
+ /* Size of the heap. */
+ size_t mem_size;
+
+ struct mem_block *heap;
+ struct mem_block *overflow;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *fd_map;
+
+ int refcount;
+} sim_state = {
+ .mutex = _MTX_INITIALIZER_NP,
+};
+
+/** Per-GEM-fd state for the simulator. */
+struct vc5_simulator_file {
+ int fd;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *bo_map;
+
+ struct mem_block *gmp;
+ void *gmp_vaddr;
+};
+
+/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */
+struct vc5_simulator_bo {
+ struct vc5_simulator_file *file;
+
+ /** Area for this BO within sim_state->mem */
+ struct mem_block *block;
+ uint32_t size;
+ void *vaddr;
+
+ void *winsys_map;
+ uint32_t winsys_stride;
+
+ int handle;
+};
+
+static void *
+int_to_key(int key)
+{
+ return (void *)(uintptr_t)key;
+}
+
+static struct vc5_simulator_file *
+vc5_get_simulator_file_for_fd(int fd)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map,
+ int_to_key(fd + 1));
+ return entry ? entry->data : NULL;
+}
+
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL 0xfedcba98
+
+/* 128kb */
+#define GMP_ALIGN2 17
+
+/**
+ * Sets the range of GPU virtual address space to have the given GMP
+ * permissions (bit 0 = read, bit 1 = write, write-only forbidden).
+ */
+static void
+set_gmp_flags(struct vc5_simulator_file *file,
+ uint32_t offset, uint32_t size, uint32_t flag)
+{
+ assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0);
+ int gmp_offset = offset >> GMP_ALIGN2;
+ int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2;
+ uint32_t *gmp = file->gmp_vaddr;
+
+ assert(flag <= 0x3);
+
+ for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) {
+ int32_t bitshift = (i % 16) * 2;
+ gmp[i / 16] &= ~(0x3 << bitshift);
+ gmp[i / 16] |= flag << bitshift;
+ }
+}
+
+/**
+ * Allocates space in simulator memory and returns a tracking struct for it
+ * that also contains the drm_gem_cma_object struct.
+ */
+static struct vc5_simulator_bo *
+vc5_create_simulator_bo(int fd, int handle, unsigned size)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = rzalloc(file,
+ struct vc5_simulator_bo);
+ size = align(size, 4096);
+
+ sim_bo->file = file;
+ sim_bo->handle = handle;
+
+ mtx_lock(&sim_state.mutex);
+ sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0);
+ mtx_unlock(&sim_state.mutex);
+ assert(sim_bo->block);
+
+ set_gmp_flags(file, sim_bo->block->ofs, size, 0x3);
+
+ sim_bo->size = size;
+ sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
+ memset(sim_bo->vaddr, 0xd0, size);
+
+ *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL;
+
+ /* A handle of 0 is used for vc5_gem.c internal allocations that
+ * don't need to go in the lookup table.
+ */
+ if (handle != 0) {
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(file->bo_map, int_to_key(handle),
+ sim_bo);
+ mtx_unlock(&sim_state.mutex);
+ }
+
+ return sim_bo;
+}
+
+static void
+vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo)
+{
+ struct vc5_simulator_file *sim_file = sim_bo->file;
+
+ if (sim_bo->winsys_map)
+ munmap(sim_bo->winsys_map, sim_bo->size);
+
+ set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0);
+
+ mtx_lock(&sim_state.mutex);
+ u_mmFreeMem(sim_bo->block);
+ if (sim_bo->handle) {
+ struct hash_entry *entry =
+ _mesa_hash_table_search(sim_file->bo_map,
+ int_to_key(sim_bo->handle));
+ _mesa_hash_table_remove(sim_file->bo_map, entry);
+ }
+ mtx_unlock(&sim_state.mutex);
+ ralloc_free(sim_bo);
+}
+
+static struct vc5_simulator_bo *
+vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle)
+{
+ mtx_lock(&sim_state.mutex);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle));
+ mtx_unlock(&sim_state.mutex);
+
+ return entry ? entry->data : NULL;
+}
+
+static int
+vc5_simulator_pin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ vc5_bo_map(bo);
+ memcpy(sim_bo->vaddr, bo->map, bo->size);
+ }
+
+ return 0;
+}
+
+static int
+vc5_simulator_unpin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ assert(*(uint32_t *)(sim_bo->vaddr +
+ sim_bo->size) == BO_SENTINEL);
+
+ vc5_bo_map(bo);
+ memcpy(bo->map, sim_bo->vaddr, bo->size);
+ }
+
+ return 0;
+}
+
+#if 0
+static void
+vc5_dump_to_file(struct vc5_exec_info *exec)
+{
+ static int dumpno = 0;
+ struct drm_vc5_get_hang_state *state;
+ struct drm_vc5_get_hang_state_bo *bo_state;
+ unsigned int dump_version = 0;
+
+ if (!(vc5_debug & VC5_DEBUG_DUMP))
+ return;
+
+ state = calloc(1, sizeof(*state));
+
+ int unref_count = 0;
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ unref_count++;
+ }
+
+ /* Add one more for the overflow area that isn't wrapped in a BO. */
+ state->bo_count = exec->bo_count + unref_count + 1;
+ bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+ char *filename = NULL;
+ asprintf(&filename, "vc5-dri-%d.dump", dumpno++);
+ FILE *f = fopen(filename, "w+");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s: %s", filename,
+ strerror(errno));
+ return;
+ }
+
+ fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+ state->ct0ca = exec->ct0ca;
+ state->ct0ea = exec->ct0ea;
+ state->ct1ca = exec->ct1ca;
+ state->ct1ea = exec->ct1ea;
+ state->start_bin = exec->ct0ca;
+ state->start_render = exec->ct1ca;
+ fwrite(state, sizeof(*state), 1, f);
+
+ int i;
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ bo_state[i].handle = i; /* Not used by the parser. */
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ bo_state[i].handle = 0;
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ i++;
+ }
+
+ /* Add the static overflow memory area. */
+ bo_state[i].handle = exec->bo_count;
+ bo_state[i].paddr = sim_state.overflow->ofs;
+ bo_state[i].size = sim_state.overflow->size;
+ i++;
+
+ fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+ for (int i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ void *overflow = calloc(1, sim_state.overflow->size);
+ fwrite(overflow, 1, sim_state.overflow->size, f);
+ free(overflow);
+
+ free(state);
+ free(bo_state);
+ fclose(f);
+}
+#endif
+
+#define V3D_WRITE(reg, val) v3d_hw_write_reg(sim_state.v3d, reg, val)
+#define V3D_READ(reg) v3d_hw_read_reg(sim_state.v3d, reg)
+
+static void
+vc5_flush_l3(void)
+{
+ if (!v3d_hw_has_gca(sim_state.v3d))
+ return;
+
+ uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
+
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
+}
+
+/* Invalidates the L2 cache. This is a read-only cache. */
+static void
+vc5_flush_l2(void)
+{
+ V3D_WRITE(V3D_CTL_0_L2CACTL,
+ V3D_CTL_0_L2CACTL_L2CCLR_SET |
+ V3D_CTL_0_L2CACTL_L2CENA_SET);
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+vc5_flush_l2t(void)
+{
+ V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
+ V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
+ V3D_WRITE(V3D_CTL_0_L2TCACTL,
+ V3D_CTL_0_L2TCACTL_L2TFLS_SET |
+ (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+}
+
+/* Invalidates the slice caches. These are read-only caches. */
+static void
+vc5_flush_slices(void)
+{
+ V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
+}
+
+static void
+vc5_flush_caches(void)
+{
+ vc5_flush_l3();
+ vc5_flush_l2();
+ vc5_flush_l2t();
+ vc5_flush_slices();
+}
+
+int
+vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_vc5_submit_cl *submit, struct vc5_job *job)
+{
+ struct vc5_screen *screen = vc5->screen;
+ int fd = screen->fd;
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]);
+ struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL;
+ struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL;
+ uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
+ uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
+ uint32_t row_len = MIN2(sim_stride, winsys_stride);
+ int ret;
+
+ if (ctex && csim_bo->winsys_map) {
+#if 0
+ fprintf(stderr, "%dx%d %d %d %d\n",
+ ctex->base.b.width0, ctex->base.b.height0,
+ winsys_stride,
+ sim_stride,
+ ctex->bo->size);
+#endif
+
+ for (int y = 0; y < ctex->base.b.height0; y++) {
+ memcpy(ctex->bo->map + y * sim_stride,
+ csim_bo->winsys_map + y * winsys_stride,
+ row_len);
+ }
+ }
+
+ ret = vc5_simulator_pin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ //vc5_dump_to_file(&exec);
+
+ /* Completely reset the GMP. */
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CFG,
+ V3D_GMP_0_CFG_PROTENABLE_SET);
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_TABLE_ADDR, file->gmp->ofs);
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CLEAR_LOAD, ~0);
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_GMP_0_STATUS) &
+ V3D_GMP_0_STATUS_CFG_BUSY_SET) {
+ ;
+ }
+
+ vc5_flush_caches();
+
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QBA, submit->bcl_start);
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QEA, submit->bcl_end);
+
+ /* Wait for bin to complete before firing render, as it seems the
+ * simulator doesn't implement the semaphores.
+ */
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0EA)) {
+ v3d_hw_tick(sim_state.v3d);
+ }
+
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QBA, submit->rcl_start);
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QEA, submit->rcl_end);
+
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1EA) ||
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1EA)) {
+ v3d_hw_tick(sim_state.v3d);
+ }
+
+ ret = vc5_simulator_unpin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ if (ctex && csim_bo->winsys_map) {
+ for (int y = 0; y < ctex->base.b.height0; y++) {
+ memcpy(csim_bo->winsys_map + y * winsys_stride,
+ ctex->bo->map + y * sim_stride,
+ row_len);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Map the underlying GEM object from the real hardware GEM handle.
+ */
+static void *
+vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo)
+{
+ int ret;
+ void *map;
+
+ struct drm_mode_map_dumb map_dumb = {
+ .handle = sim_bo->handle,
+ };
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, map_dumb.offset);
+ if (map == MAP_FAILED) {
+ fprintf(stderr,
+ "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ sim_bo->handle, (long long)map_dumb.offset,
+ (int)sim_bo->size);
+ abort();
+ }
+
+ return map;
+}
+
+/**
+ * Do fixups after a BO has been opened from a handle.
+ *
+ * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
+ * time, but we're still using drmPrimeFDToHandle() so we have this helper to
+ * be called afterward instead.
+ */
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size)
+{
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, handle, size);
+
+ sim_bo->winsys_stride = winsys_stride;
+ sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo);
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation.
+ *
+ * Making a VC5 BO is just a matter of making a corresponding BO on the host.
+ */
+static int
+vc5_simulator_create_bo_ioctl(int fd, struct drm_vc5_create_bo *args)
+{
+ int ret;
+ struct drm_mode_create_dumb create = {
+ .width = 128,
+ .bpp = 8,
+ .height = (args->size + 127) / 128,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ assert(create.size >= args->size);
+
+ args->handle = create.handle;
+
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, create.handle, args->size);
+
+ args->offset = sim_bo->block->ofs;
+
+ return ret;
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
+ *
+ * We just pass this straight through to dumb mmap.
+ */
+static int
+vc5_simulator_mmap_bo_ioctl(int fd, struct drm_vc5_mmap_bo *args)
+{
+ int ret;
+ struct drm_mode_map_dumb map = {
+ .handle = args->handle,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+ args->offset = map.offset;
+
+ return ret;
+}
+
+static int
+vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
+{
+ /* Free the simulator's internal tracking. */
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file,
+ args->handle);
+
+ vc5_free_simulator_bo(sim_bo);
+
+ /* Pass the call on down. */
+ return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args);
+}
+
+static int
+vc5_simulator_get_param_ioctl(int fd, struct drm_vc5_get_param *args)
+{
+ static const uint32_t reg_map[] = {
+ [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
+ };
+
+ if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
+ args->value = v3d_hw_read_reg(sim_state.v3d,
+ reg_map[args->param]);
+ return 0;
+ }
+
+ fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
+ (long long)args->value);
+ abort();
+}
+
+int
+vc5_simulator_ioctl(int fd, unsigned long request, void *args)
+{
+ switch (request) {
+ case DRM_IOCTL_VC5_CREATE_BO:
+ return vc5_simulator_create_bo_ioctl(fd, args);
+ case DRM_IOCTL_VC5_MMAP_BO:
+ return vc5_simulator_mmap_bo_ioctl(fd, args);
+
+ case DRM_IOCTL_VC5_WAIT_BO:
+ case DRM_IOCTL_VC5_WAIT_SEQNO:
+ /* We do all of the vc5 rendering synchronously, so we just
+ * return immediately on the wait ioctls. This ignores any
+ * native rendering to the host BO, so it does mean we race on
+ * front buffer rendering.
+ */
+ return 0;
+
+ case DRM_IOCTL_VC5_GET_PARAM:
+ return vc5_simulator_get_param_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_CLOSE:
+ return vc5_simulator_gem_close_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_OPEN:
+ case DRM_IOCTL_GEM_FLINK:
+ return drmIoctl(fd, request, args);
+ default:
+ fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request);
+ abort();
+ }
+}
+
+static void
+vc5_simulator_init_global(void)
+{
+ mtx_lock(&sim_state.mutex);
+ if (sim_state.refcount++) {
+ mtx_unlock(&sim_state.mutex);
+ return;
+ }
+
+ sim_state.v3d = v3d_hw_auto_new(NULL);
+ v3d_hw_alloc_mem(sim_state.v3d, 256 * 1024 * 1024);
+ sim_state.mem_base =
+ v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size,
+ &sim_state.mem);
+
+ sim_state.heap = u_mmInit(0, sim_state.mem_size);
+
+ /* Make a block of 0xd0 at address 0 to make sure we don't screw up
+ * and land there.
+ */
+ struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0);
+ memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096);
+
+ mtx_unlock(&sim_state.mutex);
+
+ sim_state.fd_map =
+ _mesa_hash_table_create(NULL,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
+void
+vc5_simulator_init(struct vc5_screen *screen)
+{
+ vc5_simulator_init_global();
+
+ screen->sim_file = rzalloc(screen, struct vc5_simulator_file);
+ struct vc5_simulator_file *sim_file = screen->sim_file;
+
+ screen->sim_file->bo_map =
+ _mesa_hash_table_create(screen->sim_file,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1),
+ screen->sim_file);
+ mtx_unlock(&sim_state.mutex);
+
+ sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0);
+ sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs -
+ sim_state.mem_base);
+}
+
+void
+vc5_simulator_destroy(struct vc5_screen *screen)
+{
+ mtx_lock(&sim_state.mutex);
+ if (!--sim_state.refcount) {
+ _mesa_hash_table_destroy(sim_state.fd_map, NULL);
+ u_mmDestroy(sim_state.heap);
+ /* No memsetting the struct, because it contains the mutex. */
+ sim_state.mem = NULL;
+ }
+ mtx_unlock(&sim_state.mutex);
+}
+
+#endif /* USE_VC5_SIMULATOR */
diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c
new file mode 100644
index 00000000000..b289d20cb62
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_state.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_half.h"
+#include "util/u_helpers.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void *
+vc5_generic_cso_state_create(const void *src, uint32_t size)
+{
+ void *dst = calloc(1, size);
+ if (!dst)
+ return NULL;
+ memcpy(dst, src, size);
+ return dst;
+}
+
+static void
+vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+vc5_set_blend_color(struct pipe_context *pctx,
+ const struct pipe_blend_color *blend_color)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend_color.f = *blend_color;
+ for (int i = 0; i < 4; i++) {
+ vc5->blend_color.hf[i] =
+ util_float_to_half(blend_color->color[i]);
+ }
+ vc5->dirty |= VC5_DIRTY_BLEND_COLOR;
+}
+
+static void
+vc5_set_stencil_ref(struct pipe_context *pctx,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stencil_ref = *stencil_ref;
+ vc5->dirty |= VC5_DIRTY_STENCIL_REF;
+}
+
+static void
+vc5_set_clip_state(struct pipe_context *pctx,
+ const struct pipe_clip_state *clip)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->clip = *clip;
+ vc5->dirty |= VC5_DIRTY_CLIP;
+}
+
+static void
+vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
+ vc5->dirty |= VC5_DIRTY_SAMPLE_MASK;
+}
+
+static uint16_t
+float_to_187_half(float f)
+{
+ return fui(f) >> 16;
+}
+
+static void *
+vc5_create_rasterizer_state(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct vc5_rasterizer_state *so;
+
+ so = CALLOC_STRUCT(vc5_rasterizer_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+ * BCM21553).
+ */
+ so->point_size = MAX2(cso->point_size, .125f);
+
+ if (cso->offset_tri) {
+ so->offset_units = float_to_187_half(cso->offset_units);
+ so->offset_factor = float_to_187_half(cso->offset_scale);
+ }
+
+ return so;
+}
+
+/* Blend state is baked into shaders. */
+static void *
+vc5_create_blend_state(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso)
+{
+ return vc5_generic_cso_state_create(cso, sizeof(*cso));
+}
+
+static void *
+vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct vc5_depth_stencil_alpha_state *so;
+
+ so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ if (cso->depth.enabled) {
+ /* We only handle early Z in the < direction because otherwise
+ * we'd have to runtime guess which direction to set in the
+ * render config.
+ */
+ so->early_z_enable =
+ ((cso->depth.func == PIPE_FUNC_LESS ||
+ cso->depth.func == PIPE_FUNC_LEQUAL) &&
+ (!cso->stencil[0].enabled ||
+ (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
+ (!cso->stencil[1].enabled ||
+ cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP))));
+ }
+
+ return so;
+}
+
+static void
+vc5_set_polygon_stipple(struct pipe_context *pctx,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stipple = *stipple;
+ vc5->dirty |= VC5_DIRTY_STIPPLE;
+}
+
+static void
+vc5_set_scissor_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5->scissor = *scissor;
+ vc5->dirty |= VC5_DIRTY_SCISSOR;
+}
+
+static void
+vc5_set_viewport_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewport)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->viewport = *viewport;
+ vc5->dirty |= VC5_DIRTY_VIEWPORT;
+}
+
+static void
+vc5_set_vertex_buffers(struct pipe_context *pctx,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf;
+
+ util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
+ start_slot, count);
+ so->count = util_last_bit(so->enabled_mask);
+
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+}
+
+static void
+vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend = hwcso;
+ vc5->dirty |= VC5_DIRTY_BLEND;
+}
+
+static void
+vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_rasterizer_state *rast = hwcso;
+
+ if (vc5->rasterizer && rast &&
+ vc5->rasterizer->base.flatshade != rast->base.flatshade) {
+ vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+ }
+
+ vc5->rasterizer = hwcso;
+ vc5->dirty |= VC5_DIRTY_RASTERIZER;
+}
+
+static void
+vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->zsa = hwcso;
+ vc5->dirty |= VC5_DIRTY_ZSA;
+}
+
+static void *
+vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+ so->num_elements = num_elements;
+
+ return so;
+}
+
+static void
+vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->vtx = hwcso;
+ vc5->dirty |= VC5_DIRTY_VTXSTATE;
+}
+
+static void
+vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+ const struct pipe_constant_buffer *cb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader];
+
+ util_copy_constant_buffer(&so->cb[index], cb);
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (unlikely(!cb)) {
+ so->enabled_mask &= ~(1 << index);
+ so->dirty_mask &= ~(1 << index);
+ return;
+ }
+
+ so->enabled_mask |= 1 << index;
+ so->dirty_mask |= 1 << index;
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+}
+
+static void
+vc5_set_framebuffer_state(struct pipe_context *pctx,
+ const struct pipe_framebuffer_state *framebuffer)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct pipe_framebuffer_state *cso = &vc5->framebuffer;
+ unsigned i;
+
+ vc5->job = NULL;
+
+ for (i = 0; i < framebuffer->nr_cbufs; i++)
+ pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
+ for (; i < vc5->framebuffer.nr_cbufs; i++)
+ pipe_surface_reference(&cso->cbufs[i], NULL);
+
+ cso->nr_cbufs = framebuffer->nr_cbufs;
+
+ pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
+ cso->width = framebuffer->width;
+ cso->height = framebuffer->height;
+
+ vc5->dirty |= VC5_DIRTY_FRAMEBUFFER;
+}
+
+static struct vc5_texture_stateobj *
+vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader)
+{
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ vc5->dirty |= VC5_DIRTY_FRAGTEX;
+ return &vc5->fragtex;
+ break;
+ case PIPE_SHADER_VERTEX:
+ vc5->dirty |= VC5_DIRTY_VERTTEX;
+ return &vc5->verttex;
+ break;
+ default:
+ fprintf(stderr, "Unknown shader target %d\n", shader);
+ abort();
+ }
+}
+
+static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
+{
+ switch (pipe_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ unreachable("Unknown wrap mode");
+ }
+}
+
+
+static void *
+vc5_create_sampler_state(struct pipe_context *pctx,
+ const struct pipe_sampler_state *cso)
+{
+ struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
+ .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest),
+ .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest),
+ .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest),
+ };
+ V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
+ (uint8_t *)&so->p0,
+ &p0_unpacked);
+
+ struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+ cl_packet_header(TEXTURE_SHADER_STATE),
+
+ .min_level_of_detail = MAX2(cso->min_lod, 0.0),
+ .depth_compare_function = cso->compare_func,
+ .fixed_bias = cso->lod_bias,
+ };
+ STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+ cl_packet_length(TEXTURE_SHADER_STATE));
+ cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+ &state_unpacked);
+
+ return so;
+}
+
+static void
+vc5_sampler_states_bind(struct pipe_context *pctx,
+ enum pipe_shader_type shader, unsigned start,
+ unsigned nr, void **hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+
+ assert(start == 0);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ for (i = 0; i < nr; i++) {
+ if (hwcso[i])
+ new_nr = i + 1;
+ stage_tex->samplers[i] = hwcso[i];
+ }
+
+ for (; i < stage_tex->num_samplers; i++) {
+ stage_tex->samplers[i] = NULL;
+ }
+
+ stage_tex->num_samplers = new_nr;
+}
+
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+ switch (pipe_swizzle) {
+ case PIPE_SWIZZLE_0:
+ return 0;
+ case PIPE_SWIZZLE_1:
+ return 1;
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W:
+ return 2 + pipe_swizzle;
+ default:
+ unreachable("unknown swizzle");
+ }
+}
+
+static struct pipe_sampler_view *
+vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
+ const struct pipe_sampler_view *cso)
+{
+ struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ pipe_reference(NULL, &prsc->reference);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+ };
+
+ unpacked.return_word_0_of_texture_data = true;
+ if (vc5_get_tex_return_size(cso->format) == 16) {
+ unpacked.return_word_1_of_texture_data = true;
+ } else {
+ int chans = vc5_get_tex_return_channels(cso->format);
+
+ if (chans > 1)
+ unpacked.return_word_1_of_texture_data = true;
+ if (chans > 2)
+ unpacked.return_word_2_of_texture_data = true;
+ if (chans > 3)
+ unpacked.return_word_3_of_texture_data = true;
+ }
+
+ V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
+ (uint8_t *)&so->p1,
+ &unpacked);
+
+ /* Compute the sampler view's swizzle up front. This will be plugged
+ * into either the sampler (for 16-bit returns) or the shader's
+ * texture key (for 32)
+ */
+ uint8_t view_swizzle[4] = {
+ cso->swizzle_r,
+ cso->swizzle_g,
+ cso->swizzle_b,
+ cso->swizzle_a
+ };
+ const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format);
+ util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle);
+
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+ cl_packet_header(TEXTURE_SHADER_STATE),
+
+ .image_width = prsc->width0,
+ .image_height = prsc->height0,
+ .image_depth = prsc->depth0,
+
+ .texture_type = rsc->tex_format,
+ .srgb = util_format_is_srgb(cso->format),
+
+ .base_level = cso->u.tex.first_level,
+ .array_stride_64_byte_aligned = rsc->cube_map_stride / 64,
+ };
+
+ /* Note: Contrary to the docs, the swizzle still applies even
+ * if the return size is 32. It's just that you probably want
+ * to swizzle in the shader, because you need the Y/Z/W
+ * channels to be defined.
+ */
+ if (vc5_get_tex_return_size(cso->format) != 32) {
+ state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]);
+ state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]);
+ state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]);
+ state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]);
+ } else {
+ state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+ state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+ state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+ state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+ }
+
+ /* XXX: While we need to use this flag to enable tiled
+ * resource sharing (even a small shared buffer should be UIF,
+ * not UBLINEAR or raster), this is also at the moment
+ * patching up the fact that our resource layout's decisions
+ * about XOR don't quite match the HW's.
+ */
+ switch (rsc->slices[0].tiling) {
+ case VC5_TILING_UIF_NO_XOR:
+ case VC5_TILING_UIF_XOR:
+ state_unpacked.level_0_is_strictly_uif = true;
+ state_unpacked.level_0_xor_enable = false;
+ break;
+ default:
+ break;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+ cl_packet_length(TEXTURE_SHADER_STATE));
+ cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+ &state_unpacked);
+
+ return &so->base;
+}
+
+static void
+vc5_sampler_view_destroy(struct pipe_context *pctx,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ free(view);
+}
+
+static void
+vc5_set_sampler_views(struct pipe_context *pctx,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ assert(start == 0);
+
+ for (i = 0; i < nr; i++) {
+ if (views[i])
+ new_nr = i + 1;
+ pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
+ }
+
+ for (; i < stage_tex->num_textures; i++) {
+ pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
+ }
+
+ stage_tex->num_textures = new_nr;
+}
+
+static struct pipe_stream_output_target *
+vc5_create_stream_output_target(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ pipe_reference_init(&target->reference, 1);
+ pipe_resource_reference(&target->buffer, prsc);
+
+ target->context = pctx;
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+
+ return target;
+}
+
+static void
+vc5_stream_output_target_destroy(struct pipe_context *pctx,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ free(target);
+}
+
+static void
+vc5_set_stream_output_targets(struct pipe_context *pctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct vc5_context *ctx = vc5_context(pctx);
+ struct vc5_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
+
+ assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (i = 0; i < num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+
+ for (; i < so->num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], NULL);
+
+ so->num_targets = num_targets;
+
+ ctx->dirty |= VC5_DIRTY_STREAMOUT;
+}
+
+void
+vc5_state_init(struct pipe_context *pctx)
+{
+ pctx->set_blend_color = vc5_set_blend_color;
+ pctx->set_stencil_ref = vc5_set_stencil_ref;
+ pctx->set_clip_state = vc5_set_clip_state;
+ pctx->set_sample_mask = vc5_set_sample_mask;
+ pctx->set_constant_buffer = vc5_set_constant_buffer;
+ pctx->set_framebuffer_state = vc5_set_framebuffer_state;
+ pctx->set_polygon_stipple = vc5_set_polygon_stipple;
+ pctx->set_scissor_states = vc5_set_scissor_states;
+ pctx->set_viewport_states = vc5_set_viewport_states;
+
+ pctx->set_vertex_buffers = vc5_set_vertex_buffers;
+
+ pctx->create_blend_state = vc5_create_blend_state;
+ pctx->bind_blend_state = vc5_blend_state_bind;
+ pctx->delete_blend_state = vc5_generic_cso_state_delete;
+
+ pctx->create_rasterizer_state = vc5_create_rasterizer_state;
+ pctx->bind_rasterizer_state = vc5_rasterizer_state_bind;
+ pctx->delete_rasterizer_state = vc5_generic_cso_state_delete;
+
+ pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state;
+ pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind;
+ pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete;
+
+ pctx->create_vertex_elements_state = vc5_vertex_state_create;
+ pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete;
+ pctx->bind_vertex_elements_state = vc5_vertex_state_bind;
+
+ pctx->create_sampler_state = vc5_create_sampler_state;
+ pctx->delete_sampler_state = vc5_generic_cso_state_delete;
+ pctx->bind_sampler_states = vc5_sampler_states_bind;
+
+ pctx->create_sampler_view = vc5_create_sampler_view;
+ pctx->sampler_view_destroy = vc5_sampler_view_destroy;
+ pctx->set_sampler_views = vc5_set_sampler_views;
+
+ pctx->create_stream_output_target = vc5_create_stream_output_target;
+ pctx->stream_output_target_destroy = vc5_stream_output_target_destroy;
+ pctx->set_stream_output_targets = vc5_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/vc5/vc5_tiling.c b/src/gallium/drivers/vc5/vc5_tiling.c
new file mode 100644
index 00000000000..279774e5553
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_tiling.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_tiling.c
+ *
+ * Handles information about the VC5 tiling formats, and loading and storing
+ * from them.
+ */
+
+#include <stdint.h>
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+
+struct mb_layout {
+ /** Height, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+ uint32_t height;
+ /** Width, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+ uint32_t width;
+ uint32_t tile_row_stride;
+};
+
+enum {
+ MB_LAYOUT_8BPP,
+ MB_LAYOUT_16BPP,
+ MB_LAYOUT_32BPP,
+ MB_LAYOUT_64BPP,
+ MB_LAYOUT_128BPP,
+};
+
+static const struct mb_layout mb_layouts[] = {
+ [MB_LAYOUT_8BPP] = { .height = 16, .width = 16, .tile_row_stride = 8 },
+ [MB_LAYOUT_16BPP] = { .height = 8, .width = 16, .tile_row_stride = 8 },
+ [MB_LAYOUT_32BPP] = { .height = 8, .width = 8, .tile_row_stride = 4 },
+ [MB_LAYOUT_64BPP] = { .height = 4, .width = 8, .tile_row_stride = 4 },
+ [MB_LAYOUT_128BPP] = { .height = 4, .width = 4, .tile_row_stride = 2 },
+};
+
+static const struct mb_layout *
+get_mb_layout(int cpp)
+{
+ const struct mb_layout *layout = &mb_layouts[ffs(cpp) - 1];
+
+ /* Sanity check the table. XXX: We should de-duplicate. */
+ assert(layout->width == vc5_utile_width(cpp) * 2);
+ assert(layout->tile_row_stride == vc5_utile_width(cpp));
+
+ return layout;
+}
+
+/** Return the width in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_width(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ case 2:
+ return 8;
+ case 4:
+ case 8:
+ return 4;
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_height(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ return 4;
+ case 8:
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/**
+ * Returns the byte address for a given pixel within a utile.
+ *
+ * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
+ * arrangement.
+ */
+static inline uint32_t
+vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+
+ assert(x < utile_w && y < utile_h);
+
+ return x * cpp + y * utile_w * cpp;
+}
+
+/**
+ * Returns the byte offset for a given pixel in a LINEARTILE layout.
+ *
+ * LINEARTILE is a single line of utiles in either the X or Y direction.
+ */
+static inline uint32_t
+vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t utile_index_x = x / utile_w;
+ uint32_t utile_index_y = y / utile_h;
+
+ assert(utile_index_x == 0 || utile_index_y == 0);
+
+ return (64 * (utile_index_x + utile_index_y) +
+ vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UBLINEAR layout.
+ *
+ * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
+ * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
+ */
+static inline uint32_t
+vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+ int ublinear_number)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t ub_w = utile_w * 2;
+ uint32_t ub_h = utile_h * 2;
+ uint32_t ub_x = x / ub_w;
+ uint32_t ub_y = y / ub_h;
+
+ return (256 * (ub_y * ublinear_number +
+ ub_x) +
+ ((x & utile_w) ? 64 : 0) +
+ ((y & utile_h) ? 128 : 0) +
+ + vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+static inline uint32_t
+vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
+}
+
+static inline uint32_t
+vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UIF layout.
+ *
+ * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
+ * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
+ * 4x4 groups, and those 4x4 groups are then stored in raster order.
+ */
+static inline uint32_t
+vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ const struct mb_layout *layout = get_mb_layout(cpp);
+ uint32_t mb_width = layout->width;
+ uint32_t mb_height = layout->height;
+ uint32_t log2_mb_width = ffs(mb_width) - 1;
+ uint32_t log2_mb_height = ffs(mb_height) - 1;
+
+ /* Macroblock X, y */
+ uint32_t mb_x = x >> log2_mb_width;
+ uint32_t mb_y = y >> log2_mb_height;
+ /* X, y within the macroblock */
+ uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
+ uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
+
+ uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
+ uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
+
+ uint32_t mb_base_addr = mb_id * 256;
+
+ bool top = mb_pixel_y < mb_height / 2;
+ bool left = mb_pixel_x < mb_width / 2;
+
+ /* Docs have this in pixels, we do bytes here. */
+ uint32_t mb_tile_offset = (!top * 128 + !left * 64);
+
+ uint32_t mb_tile_y = mb_pixel_y & ~(mb_height / 2);
+ uint32_t mb_tile_x = mb_pixel_x & ~(mb_width / 2);
+ uint32_t mb_tile_pixel_id = (mb_tile_y *
+ layout->tile_row_stride +
+ mb_tile_x);
+
+ uint32_t mb_tile_addr = mb_tile_pixel_id * cpp;
+
+ uint32_t mb_pixel_address = (mb_base_addr +
+ mb_tile_offset +
+ mb_tile_addr);
+
+ return mb_pixel_address;
+}
+
+static inline void
+vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ for (uint32_t y = 0; y < box->height; y++) {
+ void *cpu_row = cpu + y * cpu_stride;
+
+ for (int x = 0; x < box->width; x++) {
+ uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
+ box->x + x,
+ box->y + y);
+
+ if (false) {
+ fprintf(stderr, "%3d,%3d -> %d\n",
+ box->x + x, box->y + y,
+ pixel_offset);
+ }
+
+ if (is_load) {
+ memcpy(cpu_row + x * cpp,
+ gpu + pixel_offset,
+ cpp);
+ } else {
+ memcpy(gpu + pixel_offset,
+ cpu_row + x * cpp,
+ cpp);
+ }
+ }
+ }
+}
+
+static inline void
+vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ switch (cpp) {
+ case 1:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 1, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 2:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 2, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 4:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 4, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 8:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 8, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 16:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 16, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ }
+}
+
+static inline void
+vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ enum vc5_tiling_mode tiling_format,
+ int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box,
+ bool is_load)
+{
+ switch (tiling_format) {
+ case VC5_TILING_UIF_NO_XOR:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_uif_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_2_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_2_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_1_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_1_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_LINEARTILE:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_lt_pixel_offset,
+ is_load);
+ break;
+ default:
+ unreachable("Unsupported tiling format");
+ break;
+ }
+}
+
+/**
+ * Loads pixel data from the start (microtile-aligned) box in \p src to the
+ * start of \p dst according to the given tiling format.
+ */
+void
+vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(src, src_stride,
+ dst, dst_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ true);
+}
+
+/**
+ * Stores pixel data from the start of \p src into a (microtile-aligned) box in
+ * \p dst according to the given tiling format.
+ */
+void
+vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(dst, dst_stride,
+ src, src_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ false);
+}
diff --git a/src/gallium/drivers/vc5/vc5_tiling.h b/src/gallium/drivers/vc5/vc5_tiling.h
new file mode 100644
index 00000000000..d3cf48c4527
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_tiling.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_TILING_H
+#define VC5_TILING_H
+
+uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
+void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
+void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
+void vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+void vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+
+#endif /* VC5_TILING_H */
diff --git a/src/gallium/drivers/vc5/vc5_uniforms.c b/src/gallium/drivers/vc5/vc5_uniforms.c
new file mode 100644
index 00000000000..dc444fe92a4
--- /dev/null
+++ b/src/gallium/drivers/vc5/vc5_uniforms.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc5_context.h"
+#include "compiler/v3d_compiler.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#if 0
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc5_resource *rsc = vc5_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc5_format-sampled texture contents are
+ * replaced with our border color, the vc5_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc5_format) {
+ default:
+ case VC5_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC5_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+#endif
+
+static uint32_t
+get_texrect_scale(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static uint32_t
+get_texture_size(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+
+ switch (contents) {
+ case QUNIFORM_TEXTURE_WIDTH:
+ return u_minify(texture->texture->width0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_HEIGHT:
+ return u_minify(texture->texture->height0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_DEPTH:
+ return u_minify(texture->texture->depth0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ return texture->texture->array_size;
+ case QUNIFORM_TEXTURE_LEVELS:
+ return (texture->u.tex.last_level -
+ texture->u.tex.first_level) + 1;
+ default:
+ unreachable("Bad texture size field");
+ }
+}
+
+static struct vc5_bo *
+vc5_upload_ubo(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->prog_data.base->ubo_size)
+ return NULL;
+
+ struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen,
+ shader->prog_data.base->ubo_size,
+ "ubo");
+ void *data = vc5_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
+ memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
+ ((const void *)gallium_uniforms +
+ shader->prog_data.base->ubo_ranges[i].src_offset),
+ shader->prog_data.base->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+/**
+ * Writes the P0 (CFG_MODE=1) texture parameter.
+ *
+ * Some bits of this field are dependent on the type of sample being done by
+ * the shader, while other bits are dependent on the sampler state. We OR the
+ * two together here.
+ */
+static void
+write_texture_p0(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit,
+ uint32_t shader_data)
+{
+ struct pipe_sampler_state *psampler = texstate->samplers[unit];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+ cl_aligned_u32(uniforms, shader_data | sampler->p0);
+}
+
+static void
+write_texture_p1(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_view *psview = texstate->textures[unit];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+ .texture_state_record_base_address = texstate->texture_state[unit],
+ };
+
+ uint32_t packed;
+ V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
+ (uint8_t *)&packed,
+ &unpacked);
+
+ cl_aligned_u32(uniforms, packed | sview->p1);
+}
+
+struct vc5_cl_reloc
+vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate)
+{
+ struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
+ struct vc5_job *job = vc5->job;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms);
+
+ /* We always need to return some space for uniforms, because the HW
+ * will be prefetching, even if we don't read any in the program.
+ */
+ vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
+
+ struct vc5_cl_reloc uniform_stream =
+ cl_address(job->indirect.bo, cl_offset(&job->indirect));
+ vc5_bo_reference(uniform_stream.bo);
+
+ struct vc5_cl_out *uniforms =
+ cl_start(&job->indirect);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc5->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+#if 0
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ write_texture_first_level(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+#endif
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ cl_aligned_u32(&uniforms,
+ get_texture_size(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc5->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc5->zsa->base.alpha.ref_value);
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ cl_aligned_u32(&uniforms, vc5->sample_mask);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ if (uinfo->data[i] == 0) {
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ ubo, 0);
+ } else {
+ int ubo_index = uinfo->data[i];
+ struct vc5_resource *rsc =
+ vc5_resource(cb->cb[ubo_index].buffer);
+
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ rsc->bo,
+ cb->cb[ubo_index].buffer_offset);
+ }
+ break;
+
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ /* XXX */
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
+
+ write_texture_p0(job, &uniforms, texstate,
+ uinfo->contents[i] -
+ QUNIFORM_TEXTURE_CONFIG_P0_0,
+ uinfo->data[i]);
+ break;
+
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+ shader, i, __gen_address_offset(&uniform_stream) + i * 4,
+ written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&job->indirect, uniforms);
+
+ vc5_bo_unreference(&ubo);
+
+ return uniform_stream;
+}
+
+void
+vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) {
+ switch (shader->prog_data.base->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC5_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC5_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC5_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ /* We could flag this on just the stage we're
+ * compiling for, but it's not passed in.
+ */
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC5_DIRTY_ZSA;
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ dirty |= VC5_DIRTY_SAMPLE_MASK;
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}