diff options
Diffstat (limited to 'src/gallium/drivers/vc5')
29 files changed, 8953 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc5/.editorconfig b/src/gallium/drivers/vc5/.editorconfig new file mode 100644 index 00000000000..f3d8c479154 --- /dev/null +++ b/src/gallium/drivers/vc5/.editorconfig @@ -0,0 +1,3 @@ +[*.{c,h}] +indent_style = space +indent_size = 8 diff --git a/src/gallium/drivers/vc5/Automake.inc b/src/gallium/drivers/vc5/Automake.inc new file mode 100644 index 00000000000..57c8a28efed --- /dev/null +++ b/src/gallium/drivers/vc5/Automake.inc @@ -0,0 +1,14 @@ +if HAVE_GALLIUM_VC5 + +TARGET_DRIVERS += vc5 +TARGET_CPPFLAGS += -DGALLIUM_VC5 +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/winsys/vc5/drm/libvc5drm.la \ + $(top_builddir)/src/gallium/drivers/vc5/libvc5.la \ + $(top_builddir)/src/broadcom/libbroadcom.la + +if !HAVE_GALLIUM_VC4 +TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la +endif + +endif diff --git a/src/gallium/drivers/vc5/Makefile.am b/src/gallium/drivers/vc5/Makefile.am new file mode 100644 index 00000000000..42d4be73d26 --- /dev/null +++ b/src/gallium/drivers/vc5/Makefile.am @@ -0,0 +1,40 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/broadcom \ + $(LIBDRM_CFLAGS) \ + $(VC5_SIMULATOR_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $() + +noinst_LTLIBRARIES = libvc5.la + +libvc5_la_SOURCES = $(C_SOURCES) + +libvc5_la_LDFLAGS = \ + $(VC5_SIMULATOR_LIBS) \ + $(NULL) diff --git a/src/gallium/drivers/vc5/Makefile.sources b/src/gallium/drivers/vc5/Makefile.sources new file mode 100644 index 00000000000..0d54f830bb2 --- /dev/null +++ b/src/gallium/drivers/vc5/Makefile.sources @@ -0,0 +1,26 @@ +C_SOURCES := \ + vc5_blit.c \ + vc5_bufmgr.c \ + vc5_bufmgr.h \ + vc5_cl.c \ + vc5_cl.h \ + vc5_context.c \ + vc5_context.h \ + vc5_draw.c \ + vc5_emit.c \ + vc5_fence.c \ + vc5_formats.c \ + vc5_job.c \ + vc5_program.c \ + vc5_query.c \ + vc5_rcl.c \ + vc5_resource.c \ + vc5_resource.h \ + vc5_screen.c \ + vc5_screen.h \ + vc5_simulator.c \ + vc5_state.c \ + vc5_tiling.c \ + vc5_tiling.h \ + vc5_uniforms.c \ + $() diff --git a/src/gallium/drivers/vc5/vc5_blit.c b/src/gallium/drivers/vc5/vc5_blit.c new file mode 100644 index 00000000000..64811416e50 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_blit.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2015-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "vc5_context.h" + +#if 0 +static struct pipe_surface * +vc5_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return pctx->create_surface(pctx, prsc, &tmpl); +} + +static bool +is_tile_unaligned(unsigned size, unsigned tile_size) +{ + return size & (tile_size - 1); +} + +static bool +vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + bool msaa = (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1); + int tile_width = msaa ? 32 : 64; + int tile_height = msaa ? 32 : 64; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + return false; + + if (info->scissor_enable) + return false; + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != info->src.box.x || + info->dst.box.y != info->src.box.y || + info->dst.box.width != info->src.box.width || + info->dst.box.height != info->src.box.height) { + return false; + } + + int dst_surface_width = u_minify(info->dst.resource->width0, + info->dst.level); + int dst_surface_height = u_minify(info->dst.resource->height0, + info->dst.level); + if (is_tile_unaligned(info->dst.box.x, tile_width) || + is_tile_unaligned(info->dst.box.y, tile_height) || + (is_tile_unaligned(info->dst.box.width, tile_width) && + info->dst.box.x + info->dst.box.width != dst_surface_width) || + (is_tile_unaligned(info->dst.box.height, tile_height) && + info->dst.box.y + info->dst.box.height != dst_surface_height)) { + return false; + } + + /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the + * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our + * destination surface) to determine the stride. This may be wrong + * when reading from texture miplevels > 0, which are stored in + * POT-sized areas. For MSAA, the tile addresses are computed + * explicitly by the RCL, but still use the destination width to + * determine the stride (which could be fixed by explicitly supplying + * it in the ABI). + */ + struct vc5_resource *rsc = vc5_resource(info->src.resource); + + uint32_t stride; + + if (info->src.resource->nr_samples > 1) + stride = align(dst_surface_width, 32) * 4 * rsc->cpp; + /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T) + stride = align(dst_surface_width * rsc->cpp, 128); */ + else + stride = align(dst_surface_width * rsc->cpp, 16); + + if (stride != rsc->slices[info->src.level].stride) + return false; + + if (info->dst.resource->format != info->src.resource->format) + return false; + + if (false) { + fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", + info->src.box.x, + info->src.box.y, + info->dst.box.x, + info->dst.box.y, + info->dst.box.width, + info->dst.box.height); + } + + struct pipe_surface *dst_surf = + vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct pipe_surface *src_surf = + vc5_get_blit_surface(pctx, info->src.resource, info->src.level); + + vc5_flush_jobs_reading_resource(vc5, info->src.resource); + + struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL); + pipe_surface_reference(&job->color_read, src_surf); + + /* If we're resolving from MSAA to single sample, we still need to run + * the engine in MSAA mode for the load. + */ + if (!job->msaa && info->src.resource->nr_samples > 1) { + job->msaa = true; + job->tile_width = 32; + job->tile_height = 32; + } + + job->draw_min_x = info->dst.box.x; + job->draw_min_y = info->dst.box.y; + job->draw_max_x = info->dst.box.x + info->dst.box.width; + job->draw_max_y = info->dst.box.y + info->dst.box.height; + job->draw_width = dst_surf->width; + job->draw_height = dst_surf->height; + + job->tile_width = tile_width; + job->tile_height = tile_height; + job->msaa = msaa; + job->needs_flush = true; + job->resolve |= PIPE_CLEAR_COLOR; + + vc5_job_submit(vc5, job); + + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); + + return true; +} +#endif + +void +vc5_blitter_save(struct vc5_context *vc5) +{ + util_blitter_save_fragment_constant_buffer_slot(vc5->blitter, + vc5->constbuf[PIPE_SHADER_FRAGMENT].cb); + util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb); + util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx); + util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); + util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer); + util_blitter_save_viewport(vc5->blitter, &vc5->viewport); + util_blitter_save_scissor(vc5->blitter, &vc5->scissor); + util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs); + util_blitter_save_blend(vc5->blitter, vc5->blend); + util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa); + util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref); + util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask); + util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer); + util_blitter_save_fragment_sampler_states(vc5->blitter, + vc5->fragtex.num_samplers, + (void **)vc5->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc5->blitter, + vc5->fragtex.num_textures, vc5->fragtex.textures); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); +} + +static bool +vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + + if (!util_blitter_is_blit_supported(vc5->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + vc5_blitter_save(vc5); + util_blitter_blit(vc5->blitter, info); + + return true; +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +void +vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + +#if 0 + if (vc5_tile_blit(pctx, blit_info)) + return; +#endif + + vc5_render_blit(pctx, &info); +} diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.c b/src/gallium/drivers/vc5/vc5_bufmgr.c new file mode 100644 index 00000000000..c6c06dcfda7 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_bufmgr.c @@ -0,0 +1,580 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <err.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/ralloc.h" + +#include "vc5_context.h" +#include "vc5_screen.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +static bool dump_stats = false; + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache); + +static void +vc5_bo_dump_stats(struct vc5_screen *screen) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + + fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); + fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024); + fprintf(stderr, " BOs cached: %d\n", cache->bo_count); + fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024); + + if (!list_empty(&cache->time_list)) { + struct vc5_bo *first = LIST_ENTRY(struct vc5_bo, + cache->time_list.next, + time_list); + struct vc5_bo *last = LIST_ENTRY(struct vc5_bo, + cache->time_list.prev, + time_list); + + fprintf(stderr, " oldest cache time: %ld\n", + (long)first->free_time); + fprintf(stderr, " newest cache time: %ld\n", + (long)last->free_time); + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + fprintf(stderr, " now: %ld\n", + time.tv_sec); + } +} + +static void +vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo) +{ + list_del(&bo->time_list); + list_del(&bo->size_list); + cache->bo_count--; + cache->bo_size -= bo->size; +} + +static struct vc5_bo * +vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = size / 4096 - 1; + + if (cache->size_list_size <= page_index) + return NULL; + + struct vc5_bo *bo = NULL; + mtx_lock(&cache->lock); + if (!list_empty(&cache->size_list[page_index])) { + bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next, + size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!vc5_bo_wait(bo, 0, NULL)) { + mtx_unlock(&cache->lock); + return NULL; + } + + pipe_reference_init(&bo->reference, 1); + vc5_bo_remove_from_cache(cache, bo); + + bo->name = name; + } + mtx_unlock(&cache->lock); + return bo; +} + +struct vc5_bo * +vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo *bo; + int ret; + + size = align(size, 4096); + + bo = vc5_bo_from_cache(screen, size, name); + if (bo) { + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb from cache:\n", + name, size / 1024); + vc5_bo_dump_stats(screen); + } + return bo; + } + + bo = CALLOC_STRUCT(vc5_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->size = size; + bo->name = name; + bo->private = true; + + retry: + ; + + bool cleared_and_retried = false; + struct drm_vc5_create_bo create = { + .size = size + }; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_CREATE_BO, &create); + bo->handle = create.handle; + bo->offset = create.offset; + + if (ret != 0) { + if (!list_empty(&screen->bo_cache.time_list) && + !cleared_and_retried) { + cleared_and_retried = true; + vc5_bo_cache_free_all(&screen->bo_cache); + goto retry; + } + + free(bo); + return NULL; + } + + screen->bo_count++; + screen->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); + vc5_bo_dump_stats(screen); + } + + return bo; +} + +void +vc5_bo_last_unreference(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + mtx_lock(&screen->bo_cache.lock); + vc5_bo_last_unreference_locked_timed(bo, time.tv_sec); + mtx_unlock(&screen->bo_cache.lock); +} + +static void +vc5_bo_free(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + if (bo->map) { + if (using_vc5_simulator && bo->name && + strcmp(bo->name, "winsys") == 0) { + free(bo->map); + } else { + munmap(bo->map, bo->size); + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + } + } + + struct drm_gem_close c; + memset(&c, 0, sizeof(c)); + c.handle = bo->handle; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); + if (ret != 0) + fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + + screen->bo_count--; + screen->bo_size -= bo->size; + + if (dump_stats) { + fprintf(stderr, "Freed %s%s%dkb:\n", + bo->name ? bo->name : "", + bo->name ? " " : "", + bo->size / 1024); + vc5_bo_dump_stats(screen); + } + + free(bo); +} + +static void +free_stale_bos(struct vc5_screen *screen, time_t time) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + bool freed_any = false; + + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + if (dump_stats && !freed_any) { + fprintf(stderr, "Freeing stale BOs:\n"); + vc5_bo_dump_stats(screen); + freed_any = true; + } + + /* If it's more than a second old, free it. */ + if (time - bo->free_time > 2) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } else { + break; + } + } + + if (dump_stats && freed_any) { + fprintf(stderr, "Freed stale BOs:\n"); + vc5_bo_dump_stats(screen); + } +} + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache) +{ + mtx_lock(&cache->lock); + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } + mtx_unlock(&cache->lock); +} + +void +vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time) +{ + struct vc5_screen *screen = bo->screen; + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = bo->size / 4096 - 1; + + if (!bo->private) { + vc5_bo_free(bo); + return; + } + + if (cache->size_list_size <= page_index) { + struct list_head *new_list = + ralloc_array(screen, struct list_head, page_index + 1); + + /* Move old list contents over (since the array has moved, and + * therefore the pointers to the list heads have to change). + */ + for (int i = 0; i < cache->size_list_size; i++) { + struct list_head *old_head = &cache->size_list[i]; + if (list_empty(old_head)) + list_inithead(&new_list[i]); + else { + new_list[i].next = old_head->next; + new_list[i].prev = old_head->prev; + new_list[i].next->prev = &new_list[i]; + new_list[i].prev->next = &new_list[i]; + } + } + for (int i = cache->size_list_size; i < page_index + 1; i++) + list_inithead(&new_list[i]); + + cache->size_list = new_list; + cache->size_list_size = page_index + 1; + } + + bo->free_time = time; + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); + cache->bo_count++; + cache->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Freed %s %dkb to cache:\n", + bo->name, bo->size / 1024); + vc5_bo_dump_stats(screen); + } + bo->name = NULL; + + free_stale_bos(screen, time); +} + +static struct vc5_bo * +vc5_bo_open_handle(struct vc5_screen *screen, + uint32_t winsys_stride, + uint32_t handle, uint32_t size) +{ + struct vc5_bo *bo; + + assert(size); + + mtx_lock(&screen->bo_handles_mutex); + + bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); + if (bo) { + pipe_reference(NULL, &bo->reference); + goto done; + } + + bo = CALLOC_STRUCT(vc5_bo); + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->handle = handle; + bo->size = size; + bo->name = "winsys"; + bo->private = false; + +#ifdef USE_VC5_SIMULATOR + vc5_simulator_open_from_handle(screen->fd, winsys_stride, + bo->handle, bo->size); + bo->map = malloc(bo->size); +#endif + + util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + +done: + mtx_unlock(&screen->bo_handles_mutex); + return bo; +} + +struct vc5_bo * +vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride) +{ + struct drm_gem_open o = { + .name = name + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); + if (ret) { + fprintf(stderr, "Failed to open bo %d: %s\n", + name, strerror(errno)); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size); +} + +struct vc5_bo * +vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride) +{ + uint32_t handle; + int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); + int size; + if (ret) { + fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd); + return NULL; + } + + /* Determine the size of the bo we were handed. */ + size = lseek(fd, 0, SEEK_END); + if (size == -1) { + fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, handle, size); +} + +int +vc5_bo_get_dmabuf(struct vc5_bo *bo) +{ + int fd; + int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle, + O_CLOEXEC, &fd); + if (ret != 0) { + fprintf(stderr, "Failed to export gem bo %d to dmabuf\n", + bo->handle); + return -1; + } + + mtx_lock(&bo->screen->bo_handles_mutex); + bo->private = false; + util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&bo->screen->bo_handles_mutex); + + return fd; +} + +bool +vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) +{ + struct drm_gem_flink flink = { + .handle = bo->handle, + }; + int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) { + fprintf(stderr, "Failed to flink bo %d: %s\n", + bo->handle, strerror(errno)); + free(bo); + return false; + } + + bo->private = false; + *name = flink.name; + + return true; +} + +static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns) +{ + struct drm_vc5_wait_seqno wait = { + .seqno = seqno, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason) +{ + if (screen->finished_seqno >= seqno) + return true; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) { + fprintf(stderr, "Blocking on seqno %lld for %s\n", + (long long)seqno, reason); + } + } + + int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + screen->finished_seqno = seqno; + return true; +} + +static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) +{ + struct drm_vc5_wait_bo wait = { + .handle = handle, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_BO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason) +{ + struct vc5_screen *screen = bo->screen; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) { + fprintf(stderr, "Blocking on %s BO for %s\n", + bo->name, reason); + } + } + + int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + return true; +} + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo) +{ + uint64_t offset; + int ret; + + if (bo->map) + return bo->map; + + struct drm_vc5_mmap_bo map; + memset(&map, 0, sizeof(map)); + map.handle = bo->handle; + ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_VC5_MMAP_BO, &map); + offset = map.offset; + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->screen->fd, offset); + if (bo->map == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + bo->handle, (long long)offset, bo->size); + abort(); + } + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); + + return bo->map; +} + +void * +vc5_bo_map(struct vc5_bo *bo) +{ + void *map = vc5_bo_map_unsynchronized(bo); + + bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map"); + if (!ok) { + fprintf(stderr, "BO wait for map failed\n"); + abort(); + } + + return map; +} + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_bo_cache *cache = &screen->bo_cache; + + vc5_bo_cache_free_all(cache); + + if (dump_stats) { + fprintf(stderr, "BO stats after screen destroy:\n"); + vc5_bo_dump_stats(screen); + } +} diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.h b/src/gallium/drivers/vc5/vc5_bufmgr.h new file mode 100644 index 00000000000..cca2b22874f --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_bufmgr.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_BUFMGR_H +#define VC5_BUFMGR_H + +#include <stdint.h> +#include "util/u_hash_table.h" +#include "util/u_inlines.h" +#include "util/list.h" +#include "vc5_screen.h" + +struct vc5_context; + +struct vc5_bo { + struct pipe_reference reference; + struct vc5_screen *screen; + void *map; + const char *name; + uint32_t handle; + uint32_t size; + + /* Address of the BO in our page tables. */ + uint32_t offset; + + /** Entry in the linked list of buffers freed, by age. */ + struct list_head time_list; + /** Entry in the per-page-count linked list of buffers freed (by age). */ + struct list_head size_list; + /** Approximate second when the bo was freed. */ + time_t free_time; + /** + * Whether only our process has a reference to the BO (meaning that + * it's safe to reuse it in the BO cache). + */ + bool private; +}; + +struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, + const char *name); +void vc5_bo_last_unreference(struct vc5_bo *bo); +void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time); +struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride); +struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, + uint32_t winsys_stride); +bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name); +int vc5_bo_get_dmabuf(struct vc5_bo *bo); + +static inline void +vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo) +{ + if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) + vc5_bo_last_unreference(*old_bo); + *old_bo = new_bo; +} + +static inline struct vc5_bo * +vc5_bo_reference(struct vc5_bo *bo) +{ + pipe_reference(NULL, &bo->reference); + return bo; +} + +static inline void +vc5_bo_unreference(struct vc5_bo **bo) +{ + struct vc5_screen *screen; + if (!*bo) + return; + + if ((*bo)->private) { + /* Avoid the mutex for private BOs */ + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference(*bo); + } else { + screen = (*bo)->screen; + mtx_lock(&screen->bo_handles_mutex); + + if (pipe_reference(&(*bo)->reference, NULL)) { + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)(*bo)->handle); + vc5_bo_last_unreference(*bo); + } + + mtx_unlock(&screen->bo_handles_mutex); + } + + *bo = NULL; +} + +static inline void +vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time) +{ + if (!*bo) + return; + + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference_locked_timed(*bo, time); + *bo = NULL; +} + +void * +vc5_bo_map(struct vc5_bo *bo); + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo); + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason); + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason); + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen); + +#endif /* VC5_BUFMGR_H */ + diff --git a/src/gallium/drivers/vc5/vc5_cl.c b/src/gallium/drivers/vc5/vc5_cl.c new file mode 100644 index 00000000000..37d96c4360c --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_cl.c @@ -0,0 +1,87 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +void +vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl) +{ + cl->base = NULL; + cl->next = cl->base; + cl->size = 0; + cl->job = job; +} + +uint32_t +vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment) +{ + uint32_t offset = align(cl_offset(cl), alignment); + + if (offset + space <= cl->size) { + cl->next = cl->base + offset; + return offset; + } + + vc5_bo_unreference(&cl->bo); + cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL"); + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; + + return 0; +} + +void +vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space) +{ + if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + return; + + struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL"); + assert(space <= new_bo->size); + + /* Chain to the new BO from the old one. */ + if (cl->bo) { + cl_emit(cl, BRANCH, branch) { + branch.address = cl_address(new_bo, 0); + } + vc5_bo_unreference(&cl->bo); + } else { + /* Root the first RCL/BCL BO in the job. */ + vc5_job_add_bo(cl->job, cl->bo); + } + + cl->bo = new_bo; + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; +} + +void +vc5_destroy_cl(struct vc5_cl *cl) +{ + vc5_bo_unreference(&cl->bo); +} diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h new file mode 100644 index 00000000000..e935eeff536 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_cl.h @@ -0,0 +1,246 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CL_H +#define VC5_CL_H + +#include <stdint.h> + +#include "util/u_math.h" +#include "util/macros.h" + +struct vc5_bo; +struct vc5_job; +struct vc5_cl; + +/** + * Undefined structure, used for typechecking that you're passing the pointers + * to these functions correctly. + */ +struct vc5_cl_out; + +/** A reference to a BO used in the CL packing functions */ +struct vc5_cl_reloc { + struct vc5_bo *bo; + uint32_t offset; +}; + +static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *); + +#define __gen_user_data struct vc5_cl +#define __gen_address_type struct vc5_cl_reloc +#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ + (reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +struct vc5_cl { + void *base; + struct vc5_job *job; + struct vc5_cl_out *next; + struct vc5_bo *bo; + uint32_t size; +}; + +void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl); +void vc5_destroy_cl(struct vc5_cl *cl); +void vc5_dump_cl(void *cl, uint32_t size, bool is_render); +uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo); + +struct PACKED unaligned_16 { uint16_t x; }; +struct PACKED unaligned_32 { uint32_t x; }; + +static inline uint32_t cl_offset(struct vc5_cl *cl) +{ + return (char *)cl->next - (char *)cl->base; +} + +static inline void +cl_advance(struct vc5_cl_out **cl, uint32_t n) +{ + (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n); +} + +static inline struct vc5_cl_out * +cl_start(struct vc5_cl *cl) +{ + return cl->next; +} + +static inline void +cl_end(struct vc5_cl *cl, struct vc5_cl_out *next) +{ + cl->next = next; + assert(cl_offset(cl) <= cl->size); +} + + +static inline void +put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val) +{ + struct unaligned_32 *p = (void *)ptr; + p->x = val; +} + +static inline void +put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val) +{ + struct unaligned_16 *p = (void *)ptr; + p->x = val; +} + +static inline void +cl_u8(struct vc5_cl_out **cl, uint8_t n) +{ + *(uint8_t *)(*cl) = n; + cl_advance(cl, 1); +} + +static inline void +cl_u16(struct vc5_cl_out **cl, uint16_t n) +{ + put_unaligned_16(*cl, n); + cl_advance(cl, 2); +} + +static inline void +cl_u32(struct vc5_cl_out **cl, uint32_t n) +{ + put_unaligned_32(*cl, n); + cl_advance(cl, 4); +} + +static inline void +cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n) +{ + *(uint32_t *)(*cl) = n; + cl_advance(cl, 4); +} + +static inline void +cl_aligned_reloc(struct vc5_cl *cl, + struct vc5_cl_out **cl_out, + struct vc5_bo *bo, uint32_t offset) +{ + cl_aligned_u32(cl_out, bo->offset + offset); + vc5_job_add_bo(cl->job, bo); +} + +static inline void +cl_ptr(struct vc5_cl_out **cl, void *ptr) +{ + *(struct vc5_cl_out **)(*cl) = ptr; + cl_advance(cl, sizeof(void *)); +} + +static inline void +cl_f(struct vc5_cl_out **cl, float f) +{ + cl_u32(cl, fui(f)); +} + +static inline void +cl_aligned_f(struct vc5_cl_out **cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + +/** + * Reference to a BO with its associated offset, used in the pack process. + */ +static inline struct vc5_cl_reloc +cl_address(struct vc5_bo *bo, uint32_t offset) +{ + struct vc5_cl_reloc reloc = { + .bo = bo, + .offset = offset, + }; + return reloc; +} + +uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align); +void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size); + +#define cl_packet_header(packet) V3D33_ ## packet ## _header +#define cl_packet_length(packet) V3D33_ ## packet ## _length +#define cl_packet_pack(packet) V3D33_ ## packet ## _pack +#define cl_packet_struct(packet) V3D33_ ## packet + +static inline void * +cl_get_emit_space(struct vc5_cl_out **cl, size_t size) +{ + void *addr = *cl; + cl_advance(cl, size); + return addr; +} + +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + */ +#define cl_emit(cl, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \ + cl_packet_length(packet))); \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_prepacked(cl, packet) do { \ + memcpy((cl)->next, packet, sizeof(*packet)); \ + cl_advance(&(cl)->next, sizeof(*packet)); \ +} while (0) + +/** + * Helper function called by the XML-generated pack functions for filling in + * an address field in shader records. + * + * Since we have a private address space as of VC5, our BOs can have lifelong + * offsets, and all the kernel needs to know is which BOs need to be paged in + * for this exec. + */ +static inline void +cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc) +{ + if (reloc->bo) + vc5_job_add_bo(cl->job, reloc->bo); +} + +#endif /* VC5_CL_H */ diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c new file mode 100644 index 00000000000..f80020ab31e --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_context.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xf86drm.h> +#include <err.h> + +#include "pipe/p_defines.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_blitter.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" +#include "pipe/p_screen.h" + +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" + +void +vc5_flush(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +static void +vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (fence) { + struct pipe_screen *screen = pctx->screen; + struct vc5_fence *f = vc5_fence_create(vc5->screen, + vc5->last_emit_seqno); + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } +} + +static void +vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + + rsc->initialized_buffers = 0; + + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (!entry) + return; + + struct vc5_job *job = entry->data; + if (job->key.zsbuf && job->key.zsbuf->texture == prsc) + job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); +} + +static void +vc5_context_destroy(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (vc5->blitter) + util_blitter_destroy(vc5->blitter); + + if (vc5->primconvert) + util_primconvert_destroy(vc5->primconvert); + + if (vc5->uploader) + u_upload_destroy(vc5->uploader); + + slab_destroy_child(&vc5->transfer_pool); + + pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL); + pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL); + + vc5_program_fini(pctx); + + ralloc_free(vc5); +} + +struct pipe_context * +vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_context *vc5; + + /* Prevent dumping of the shaders built during context setup. */ + uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB; + V3D_DEBUG &= ~V3D_DEBUG_SHADERDB; + + vc5 = rzalloc(NULL, struct vc5_context); + if (!vc5) + return NULL; + struct pipe_context *pctx = &vc5->base; + + vc5->screen = screen; + + pctx->screen = pscreen; + pctx->priv = priv; + pctx->destroy = vc5_context_destroy; + pctx->flush = vc5_pipe_flush; + pctx->invalidate_resource = vc5_invalidate_resource; + + vc5_draw_init(pctx); + vc5_state_init(pctx); + vc5_program_init(pctx); + vc5_query_init(pctx); + vc5_resource_context_init(pctx); + + vc5_job_init(vc5); + + vc5->fd = screen->fd; + + slab_create_child(&vc5->transfer_pool, &screen->transfer_pool); + + vc5->uploader = u_upload_create_default(&vc5->base); + vc5->base.stream_uploader = vc5->uploader; + vc5->base.const_uploader = vc5->uploader; + + vc5->blitter = util_blitter_create(pctx); + if (!vc5->blitter) + goto fail; + + vc5->primconvert = util_primconvert_create(pctx, + (1 << PIPE_PRIM_QUADS) - 1); + if (!vc5->primconvert) + goto fail; + + V3D_DEBUG |= saved_shaderdb_flag; + + vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; + + return &vc5->base; + +fail: + pctx->destroy(pctx); + return NULL; +} diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h new file mode 100644 index 00000000000..b8f3f784a85 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_context.h @@ -0,0 +1,466 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CONTEXT_H +#define VC5_CONTEXT_H + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/bitset.h" +#include "util/slab.h" +#include "xf86drm.h" +#include "vc5_drm.h" + +struct vc5_job; +struct vc5_bo; +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); + +#define __user +#include "vc5_drm.h" +#include "vc5_bufmgr.h" +#include "vc5_resource.h" +#include "vc5_cl.h" + +#ifdef USE_VC5_SIMULATOR +#define using_vc5_simulator true +#else +#define using_vc5_simulator false +#endif + +#define VC5_DIRTY_BLEND (1 << 0) +#define VC5_DIRTY_RASTERIZER (1 << 1) +#define VC5_DIRTY_ZSA (1 << 2) +#define VC5_DIRTY_FRAGTEX (1 << 3) +#define VC5_DIRTY_VERTTEX (1 << 4) + +#define VC5_DIRTY_BLEND_COLOR (1 << 7) +#define VC5_DIRTY_STENCIL_REF (1 << 8) +#define VC5_DIRTY_SAMPLE_MASK (1 << 9) +#define VC5_DIRTY_FRAMEBUFFER (1 << 10) +#define VC5_DIRTY_STIPPLE (1 << 11) +#define VC5_DIRTY_VIEWPORT (1 << 12) +#define VC5_DIRTY_CONSTBUF (1 << 13) +#define VC5_DIRTY_VTXSTATE (1 << 14) +#define VC5_DIRTY_VTXBUF (1 << 15) +#define VC5_DIRTY_SCISSOR (1 << 17) +#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18) +#define VC5_DIRTY_PRIM_MODE (1 << 19) +#define VC5_DIRTY_CLIP (1 << 20) +#define VC5_DIRTY_UNCOMPILED_VS (1 << 21) +#define VC5_DIRTY_UNCOMPILED_FS (1 << 22) +#define VC5_DIRTY_COMPILED_CS (1 << 23) +#define VC5_DIRTY_COMPILED_VS (1 << 24) +#define VC5_DIRTY_COMPILED_FS (1 << 25) +#define VC5_DIRTY_FS_INPUTS (1 << 26) +#define VC5_DIRTY_STREAMOUT (1 << 27) + +#define VC5_MAX_FS_INPUTS 64 + +struct vc5_sampler_view { + struct pipe_sampler_view base; + uint32_t p0; + uint32_t p1; + /* Precomputed swizzles to pass in to the shader key. */ + uint8_t swizzle[4]; + + uint8_t texture_shader_state[32]; +}; + +struct vc5_sampler_state { + struct pipe_sampler_state base; + uint32_t p0; + uint32_t p1; + + uint8_t texture_shader_state[32]; +}; + +struct vc5_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; +}; + +struct vc5_shader_uniform_info { + enum quniform_contents *contents; + uint32_t *data; + uint32_t count; +}; + +struct vc5_uncompiled_shader { + /** A name for this program, so you can track it in shader-db output. */ + uint32_t program_id; + /** How many variants of this program were compiled, for shader-db. */ + uint32_t compiled_variant_count; + struct pipe_shader_state base; + uint32_t num_tf_outputs; + struct v3d_varying_slot *tf_outputs; + uint16_t tf_specs[PIPE_MAX_SO_BUFFERS]; + uint32_t num_tf_specs; +}; + +struct vc5_compiled_shader { + struct vc5_bo *bo; + + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /** + * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the + * uniforms have to be rewritten (and therefore the shader state + * reemitted). + */ + uint32_t uniform_dirty_bits; +}; + +struct vc5_program_stateobj { + struct vc5_uncompiled_shader *bind_vs, *bind_fs; + struct vc5_compiled_shader *cs, *vs, *fs; +}; + +struct vc5_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertex_stateobj { + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned num_elements; +}; + +struct vc5_streamout_stateobj { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; +}; + +/* Hash table key for vc5->jobs */ +struct vc5_job_key { + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; +}; + +/** + * A complete bin/render job. + * + * This is all of the state necessary to submit a bin/render to the kernel. + * We want to be able to have multiple in progress at a time, so that we don't + * need to flush an existing CL just to switch to rendering to a new render + * target (which would mean reading back from the old render target when + * starting to render to it again). + */ +struct vc5_job { + struct vc5_context *vc5; + struct vc5_cl bcl; + struct vc5_cl rcl; + struct vc5_cl indirect; + struct vc5_bo *tile_alloc; + uint32_t shader_rec_count; + + struct drm_vc5_submit_cl submit; + + /** + * Set of all BOs referenced by the job. This will be used for making + * the list of BOs that the kernel will need to have paged in to + * execute our job. + */ + struct set *bos; + /* Size of the submit.bo_handles array. */ + uint32_t bo_handles_size; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; + /** @} */ + /** @{ + * Bounding box of the scissor across all queued drawing. + * + * Note that the max values are exclusive. + */ + uint32_t draw_min_x; + uint32_t draw_min_y; + uint32_t draw_max_x; + uint32_t draw_max_y; + /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC5_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ + /** @{ Tile information, depending on MSAA and float color buffer. */ + uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */ + uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */ + + uint32_t tile_width; /** @< Width of a tile. */ + uint32_t tile_height; /** @< Height of a tile. */ + /** maximum internal_bpp of all color render targets. */ + uint32_t internal_bpp; + + /** Whether the current rendering is in a 4X MSAA tile buffer. */ + bool msaa; + /** @} */ + + /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the + * first rendering. + */ + uint32_t cleared; + /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to + * (either clears or draws). + */ + uint32_t resolve; + uint32_t clear_color[2]; + uint32_t clear_zs; /**< 24-bit unorm depth/stencil */ + + /** + * Set if some drawing (triangles, blits, or just a glClear()) has + * been done to the FBO, meaning that we need to + * DRM_IOCTL_VC5_SUBMIT_CL. + */ + bool needs_flush; + + bool uses_early_z; + + /** + * Number of draw calls (not counting full buffer clears) queued in + * the current job. + */ + uint32_t draw_calls_queued; + + struct vc5_job_key key; +}; + +struct vc5_context { + struct pipe_context base; + + int fd; + struct vc5_screen *screen; + + /** The 3D rendering job for the currently bound FBO. */ + struct vc5_job *job; + + /* Map from struct vc5_job_key to the job for that FBO. + */ + struct hash_table *jobs; + + /** + * Map from vc5_resource to a job writing to that resource. + * + * Primarily for flushing jobs rendering to textures that are now + * being read from. + */ + struct hash_table *write_jobs; + + struct slab_child_pool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC5_DIRTY_* */ + uint32_t dirty; + + struct primconvert_context *primconvert; + + struct hash_table *fs_cache, *vs_cache; + uint32_t next_uncompiled_program_id; + uint64_t next_compiled_program_id; + + struct vc5_compiler_state *compiler_state; + + uint8_t prim_mode; + + /** Maximum index buffer valid for the current shader_rec. */ + uint32_t max_index; + /** Last index bias baked into the current shader_rec. */ + uint32_t last_index_bias; + + /** Seqno of the last CL flush's job. */ + uint64_t last_emit_seqno; + + struct u_upload_mgr *uploader; + + /** @{ Current pipeline state objects */ + struct pipe_scissor_state scissor; + struct pipe_blend_state *blend; + struct vc5_rasterizer_state *rasterizer; + struct vc5_depth_stencil_alpha_state *zsa; + + struct vc5_texture_stateobj verttex, fragtex; + + struct vc5_program_stateobj prog; + + struct vc5_vertex_stateobj *vtx; + + struct { + struct pipe_blend_color f; + uint16_t hf[4]; + } blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple stipple; + struct pipe_clip_state clip; + struct pipe_viewport_state viewport; + struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct vc5_vertexbuf_stateobj vertexbuf; + struct vc5_streamout_stateobj streamout; + /** @} */ +}; + +struct vc5_rasterizer_state { + struct pipe_rasterizer_state base; + + /* VC5_CONFIGURATION_BITS */ + uint8_t config_bits[3]; + + float point_size; + + /** + * Half-float (1/8/7 bits) value of polygon offset units for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_units; + /** + * Half-float (1/8/7 bits) value of polygon offset scale for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_factor; +}; + +struct vc5_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + bool early_z_enable; + + /** Uniforms for stencil state. + * + * Index 0 is either the front config, or the front-and-back config. + * Index 1 is the back config if doing separate back stencil. + * Index 2 is the writemask config if it's not a common mask value. + */ + uint32_t stencil_uniforms[3]; +}; + +#define perf_debug(...) do { \ + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +static inline struct vc5_context * +vc5_context(struct pipe_context *pcontext) +{ + return (struct vc5_context *)pcontext; +} + +static inline struct vc5_sampler_view * +vc5_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct vc5_sampler_view *)psview; +} + +static inline struct vc5_sampler_state * +vc5_sampler_state(struct pipe_sampler_state *psampler) +{ + return (struct vc5_sampler_state *)psampler; +} + +struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); +void vc5_draw_init(struct pipe_context *pctx); +void vc5_state_init(struct pipe_context *pctx); +void vc5_program_init(struct pipe_context *pctx); +void vc5_program_fini(struct pipe_context *pctx); +void vc5_query_init(struct pipe_context *pctx); + +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +int vc5_simulator_flush(struct vc5_context *vc5, + struct drm_vc5_submit_cl *args, + struct vc5_job *job); +int vc5_simulator_ioctl(int fd, unsigned long request, void *arg); +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size); + +static inline int +vc5_ioctl(int fd, unsigned long request, void *arg) +{ + if (using_vc5_simulator) + return vc5_simulator_ioctl(fd, request, arg); + else + return drmIoctl(fd, request, arg); +} + +void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader); +struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate); + +void vc5_flush(struct pipe_context *pctx); +void vc5_job_init(struct vc5_context *vc5); +struct vc5_job *vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, + struct pipe_surface *zsbuf); +struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5); +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); +void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job); +void vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_emit_state(struct pipe_context *pctx); +void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode); + +bool vc5_rt_format_supported(enum pipe_format f); +bool vc5_tex_format_supported(enum pipe_format f); +uint8_t vc5_get_rt_format(enum pipe_format f); +uint8_t vc5_get_tex_format(enum pipe_format f); +uint8_t vc5_get_tex_return_size(enum pipe_format f); +uint8_t vc5_get_tex_return_channels(enum pipe_format f); +const uint8_t *vc5_get_format_swizzle(enum pipe_format f); +void vc5_get_internal_type_bpp_for_output_format(uint32_t format, + uint32_t *type, + uint32_t *bpp); + +void vc5_init_query_functions(struct vc5_context *vc5); +void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); +void vc5_blitter_save(struct vc5_context *vc5); +void vc5_emit_rcl(struct vc5_job *job); + + +#endif /* VC5_CONTEXT_H */ diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c new file mode 100644 index 00000000000..d78fa3265fd --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_draw.c @@ -0,0 +1,607 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blitter.h" +#include "util/u_prim.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" +#include "util/u_prim_restart.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" + +#include "vc5_context.h" +#include "vc5_resource.h" +#include "vc5_cl.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +/** + * Does the initial bining command list setup for drawing to a given FBO. + */ +static void +vc5_start_draw(struct vc5_context *vc5) +{ + struct vc5_job *job = vc5->job; + + if (job->needs_flush) + return; + + /* Get space to emit our BCL state, using a branch to jump to a new BO + * if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + job->submit.bcl_start = job->bcl.bo->offset; + vc5_job_add_bo(job, job->bcl.bo); + + job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); + struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen, + job->draw_tiles_y * + job->draw_tiles_x * + 64, + "TSDA"); + + /* "Binning mode lists start with a Tile Binning Mode Configuration + * item (120)" + * + * Part1 signals the end of binning config setup. + */ + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) { + config.tile_allocation_memory_address = + cl_address(job->tile_alloc, 0); + config.tile_allocation_memory_size = job->tile_alloc->size; + } + + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { + config.tile_state_data_array_base_address = + cl_address(tsda, 0); + + config.width_in_tiles = job->draw_tiles_x; + config.height_in_tiles = job->draw_tiles_y; + + /* Must be >= 1 */ + config.number_of_render_targets = 1; + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + vc5_bo_unreference(&tsda); + + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); + + cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) { + fmt.data_type = LIST_INDEXED; + fmt.primitive_type = LIST_TRIANGLES; + } + + job->needs_flush = true; + job->draw_width = vc5->framebuffer.width; + job->draw_height = vc5->framebuffer.height; +} + +static void +vc5_predraw_check_textures(struct pipe_context *pctx, + struct vc5_texture_stateobj *stage_tex) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + for (int i = 0; i < stage_tex->num_textures; i++) { + struct pipe_sampler_view *view = stage_tex->textures[i]; + if (!view) + continue; + + vc5_flush_jobs_writing_resource(vc5, view->texture); + } +} + +static struct vc5_cl_reloc +vc5_get_default_values(struct vc5_context *vc5) +{ + struct vc5_job *job = vc5->job; + + /* VC5_DIRTY_VTXSTATE */ + struct vc5_vertex_stateobj *vtx = vc5->vtx; + + /* Set up the default values for attributes. */ + vc5_cl_ensure_space(&job->indirect, 4 * 4 * vtx->num_elements, 4); + struct vc5_cl_reloc default_values = + cl_address(job->indirect.bo, cl_offset(&job->indirect)); + vc5_bo_reference(default_values.bo); + + struct vc5_cl_out *defaults = cl_start(&job->indirect); + for (int i = 0; i < vtx->num_elements; i++) { + cl_aligned_f(&defaults, 0.0); + cl_aligned_f(&defaults, 0.0); + cl_aligned_f(&defaults, 0.0); + cl_aligned_f(&defaults, 1.0); + } + cl_end(&job->indirect, defaults); + + return default_values; +} + +static void +vc5_emit_gl_shader_state(struct vc5_context *vc5, + const struct pipe_draw_info *info, + uint32_t extra_index_bias) +{ + struct vc5_job *job = vc5->job; + /* VC5_DIRTY_VTXSTATE */ + struct vc5_vertex_stateobj *vtx = vc5->vtx; + /* VC5_DIRTY_VTXBUF */ + struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf; + + /* Upload the uniforms to the indirect CL first */ + struct vc5_cl_reloc fs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.fs, + &vc5->constbuf[PIPE_SHADER_FRAGMENT], + &vc5->fragtex); + struct vc5_cl_reloc vs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.vs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + struct vc5_cl_reloc cs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.cs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + struct vc5_cl_reloc default_values = vc5_get_default_values(vc5); + + uint32_t shader_rec_offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(GL_SHADER_STATE_RECORD) + + vtx->num_elements * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + + cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ + shader.point_size_in_shaded_vertex_data = + (info->mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + shader.fragment_shader_does_z_writes = + vc5->prog.fs->prog_data.fs->writes_z; + + shader.number_of_varyings_in_fragment_shader = + vc5->prog.fs->prog_data.base->num_inputs; + + shader.propagate_nans = true; + + shader.coordinate_shader_code_address = + cl_address(vc5->prog.cs->bo, 0); + shader.vertex_shader_code_address = + cl_address(vc5->prog.vs->bo, 0); + shader.fragment_shader_code_address = + cl_address(vc5->prog.fs->bo, 0); + + /* XXX: Use combined input/output size flag in the common + * case. + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; + shader.coordinate_shader_input_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_input_size; + shader.vertex_shader_input_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_input_size; + + shader.coordinate_shader_output_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_output_size; + + shader.coordinate_shader_uniforms_address = cs_uniforms; + shader.vertex_shader_uniforms_address = vs_uniforms; + shader.fragment_shader_uniforms_address = fs_uniforms; + + shader.vertex_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_iid; + + shader.address_of_default_attribute_values = default_values; + } + + for (int i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + struct vc5_resource *rsc = vc5_resource(vb->buffer.resource); + const struct util_format_description *desc = + util_format_description(elem->src_format); + + uint32_t offset = (vb->buffer_offset + + elem->src_offset + + vb->stride * info->index_bias); + + cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + uint32_t r_size = desc->channel[0].size; + + /* vec_size == 0 means 4 */ + attr.vec_size = desc->nr_channels & 3; + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr.type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr.type = ATTRIBUTE_HALF_FLOAT; + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr.type = ATTRIBUTE_INT; + break; + case 16: + attr.type = ATTRIBUTE_SHORT; + break; + case 10: + attr.type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr.type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr.type = ATTRIBUTE_BYTE; + abort(); + } + break; + + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + abort(); + } + + attr.signed_int_type = + desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED; + + attr.normalized_int_type = desc->channel[0].normalized; + attr.read_as_int_uint = desc->channel[0].pure_integer; + attr.address = cl_address(rsc->bo, offset); + attr.stride = vb->stride; + attr.instance_divisor = elem->instance_divisor; + attr.number_of_values_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->vattr_sizes[i]; + attr.number_of_values_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->vattr_sizes[i]; + } + } + + cl_emit(&job->bcl, GL_SHADER_STATE, state) { + state.address = cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = vtx->num_elements; + } + + vc5_bo_unreference(&cs_uniforms.bo); + vc5_bo_unreference(&vs_uniforms.bo); + vc5_bo_unreference(&fs_uniforms.bo); + vc5_bo_unreference(&default_values.bo); + + job->shader_rec_count++; +} + +static void +vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + if (!info->count_from_stream_output && !info->indirect && + !info->primitive_restart && + !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) + return; + + /* Fall back for weird desktop GL primitive restart values. */ + if (info->primitive_restart && + info->index_size) { + uint32_t mask = ~0; + + switch (info->index_size) { + case 2: + mask = 0xffff; + break; + case 1: + mask = 0xff; + break; + } + + if (info->restart_index != mask) { + util_draw_vbo_without_prim_restart(pctx, info); + return; + } + } + + if (info->mode >= PIPE_PRIM_QUADS) { + util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base); + util_primconvert_draw_vbo(vc5->primconvert, info); + perf_debug("Fallback conversion for %d %s vertices\n", + info->count, u_prim_name(info->mode)); + return; + } + + /* Before setting up the draw, do any fixup blits necessary. */ + vc5_predraw_check_textures(pctx, &vc5->verttex); + vc5_predraw_check_textures(pctx, &vc5->fragtex); + + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* Get space to emit our draw call into the BCL, using a branch to + * jump to a new BO if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + if (vc5->prim_mode != info->mode) { + vc5->prim_mode = info->mode; + vc5->dirty |= VC5_DIRTY_PRIM_MODE; + } + + vc5_start_draw(vc5); + vc5_update_compiled_shaders(vc5, info->mode); + + vc5_emit_state(pctx); + + if ((vc5->dirty & (VC5_DIRTY_VTXBUF | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_COMPILED_CS | + VC5_DIRTY_COMPILED_VS | + VC5_DIRTY_COMPILED_FS | + vc5->prog.cs->uniform_dirty_bits | + vc5->prog.vs->uniform_dirty_bits | + vc5->prog.fs->uniform_dirty_bits)) || + vc5->last_index_bias != info->index_bias) { + vc5_emit_gl_shader_state(vc5, info, 0); + } + + vc5->dirty = 0; + + /* Note that the primitive type fields match with OpenGL/gallium + * definitions, up to but not including QUADS. + */ + if (info->index_size) { + uint32_t index_size = info->index_size; + uint32_t offset = info->start * index_size; + struct pipe_resource *prsc; + if (info->has_user_indices) { + prsc = NULL; + u_upload_data(vc5->uploader, 0, + info->count * info->index_size, 4, + info->index.user, + &offset, &prsc); + } else { + prsc = info->index.resource; + } + struct vc5_resource *rsc = vc5_resource(prsc); + + if (info->instance_count > 1) { + cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); + prim.mode = info->mode; + prim.enable_primitive_restarts = info->primitive_restart; + + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.length = info->count; + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); + prim.mode = info->mode; + prim.enable_primitive_restarts = info->primitive_restart; + } + } + + job->draw_calls_queued++; + + if (info->has_user_indices) + pipe_resource_reference(&prsc, NULL); + } else { + if (info->instance_count > 1) { + cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) { + prim.mode = info->mode; + prim.index_of_first_vertex = info->start; + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) { + prim.mode = info->mode; + prim.length = info->count; + prim.index_of_first_vertex = info->start; + } + } + } + job->draw_calls_queued++; + + if (vc5->zsa && job->zsbuf && + (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + vc5_job_add_bo(job, rsc->bo); + + if (vc5->zsa->base.depth.enabled) { + job->resolve |= PIPE_CLEAR_DEPTH; + rsc->initialized_buffers = PIPE_CLEAR_DEPTH; + + if (vc5->zsa->early_z_enable) + job->uses_early_z = true; + } + + if (vc5->zsa->base.stencil[0].enabled) { + job->resolve |= PIPE_CLEAR_STENCIL; + rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + + if (job->resolve & bit || !job->cbufs[i]) + continue; + struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture); + + job->resolve |= bit; + vc5_job_add_bo(job, rsc->bo); + } + + if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) + vc5_flush(pctx); +} + +static uint32_t +pack_rgba(enum pipe_format format, const float *rgba) +{ + union util_color uc; + util_pack_color(rgba, format, &uc); + if (util_format_get_blocksize(format) == 2) + return uc.us; + else + return uc.ui[0]; +} + +static void +vc5_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* We can't flag new buffers for clearing once we've queued draws. We + * could avoid this by using the 3d engine to clear. + */ + if (job->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); + vc5_job_submit(vc5, job); + job = vc5_get_job_for_fbo(vc5); + } + + if (buffers & PIPE_CLEAR_COLOR0) { + struct vc5_resource *rsc = + vc5_resource(vc5->framebuffer.cbufs[0]->texture); + uint32_t clear_color; + +#if 0 + if (vc5_rt_format_is_565(vc5->framebuffer.cbufs[0]->format)) { + /* In 565 mode, the hardware will be packing our color + * for us. + */ + clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, + color->f); + } else { + /* Otherwise, we need to do this packing because we + * support multiple swizzlings of RGBA8888. + */ + clear_color = + pack_rgba(vc5->framebuffer.cbufs[0]->format, + color->f); + } +#endif + clear_color = pack_rgba(vc5->framebuffer.cbufs[0]->format, + color->f); + + job->clear_color[0] = job->clear_color[1] = clear_color; + rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0); + } + + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + struct vc5_resource *rsc = + vc5_resource(vc5->framebuffer.zsbuf->texture); + unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; + + if (buffers & PIPE_CLEAR_DEPTH) { + job->clear_zs |= + util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM, + depth, 0); + } + if (buffers & PIPE_CLEAR_STENCIL) { + job->clear_zs |= + util_pack_z_stencil(PIPE_FORMAT_S8_UINT_Z24_UNORM, + 0, stencil); + } + + rsc->initialized_buffers |= zsclear; + } + + job->draw_min_x = 0; + job->draw_min_y = 0; + job->draw_max_x = vc5->framebuffer.width; + job->draw_max_y = vc5->framebuffer.height; + job->cleared |= buffers; + job->resolve |= buffers; + + vc5_start_draw(vc5); +} + +static void +vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear RT\n"); +} + +static void +vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear DS\n"); +} + +void +vc5_draw_init(struct pipe_context *pctx) +{ + pctx->draw_vbo = vc5_draw_vbo; + pctx->clear = vc5_clear; + pctx->clear_render_target = vc5_clear_render_target; + pctx->clear_depth_stencil = vc5_clear_depth_stencil; +} diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/src/gallium/drivers/vc5/vc5_drm.h new file mode 100644 index 00000000000..e70cf9d56a6 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_drm.h @@ -0,0 +1,191 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _VC5_DRM_H_ +#define _VC5_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_VC5_SUBMIT_CL 0x00 +#define DRM_VC5_WAIT_SEQNO 0x01 +#define DRM_VC5_WAIT_BO 0x02 +#define DRM_VC5_CREATE_BO 0x03 +#define DRM_VC5_MMAP_BO 0x04 +#define DRM_VC5_GET_PARAM 0x05 +#define DRM_VC5_GET_BO_OFFSET 0x06 + +#define DRM_IOCTL_VC5_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl) +#define DRM_IOCTL_VC5_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno) +#define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo) +#define DRM_IOCTL_VC5_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo) +#define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo) +#define DRM_IOCTL_VC5_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_PARAM, struct drm_vc5_get_param) +#define DRM_IOCTL_VC5_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset) + +/** + * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute an optional binner + * command list, and a render command list. + */ +struct drm_vc5_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + */ + __u32 bcl_start; + + /** End address of the BCL (first byte after the BCL) */ + __u32 bcl_end; + + /* Offset of the render command list. + * + * This is the second set of commands executed, which will either + * execute the tiles that have been set up by the BCL, or a fixed set + * of tiles (in the case of RCL-only blits). + */ + __u32 rcl_start; + + /** End address of the RCL (first byte after the RCL) */ + __u32 rcl_end; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Pointer to an array of chunks of extra submit CL information. (the + * chunk struct is not yet defined) + */ + __u64 chunks; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + __u32 chunk_count; + + __u64 flags; +}; + +/** + * struct drm_vc5_wait_seqno - ioctl argument for waiting for + * DRM_VC5_SUBMIT_CL completion using its returned seqno. + * + * timeout_ns is the timeout in nanoseconds, where "0" means "don't + * block, just return the status." + */ +struct drm_vc5_wait_seqno { + __u64 seqno; + __u64 timeout_ns; +}; + +/** + * struct drm_vc5_wait_bo - ioctl argument for waiting for + * completion of the last DRM_VC5_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_vc5_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_vc5_create_bo - ioctl argument for creating VC5 BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc5_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the V3D address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + */ + __u32 offset; +}; + +/** + * struct drm_vc5_mmap_bo - ioctl argument for mapping VC5 BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc5_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_vc5_param { + DRM_VC5_PARAM_V3D_UIFCFG, + DRM_VC5_PARAM_V3D_HUB_IDENT1, + DRM_VC5_PARAM_V3D_HUB_IDENT2, + DRM_VC5_PARAM_V3D_HUB_IDENT3, + DRM_VC5_PARAM_V3D_CORE0_IDENT0, + DRM_VC5_PARAM_V3D_CORE0_IDENT1, + DRM_VC5_PARAM_V3D_CORE0_IDENT2, +}; + +struct drm_vc5_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the V3D address space for this DRM fd. + * This is the same value returned by drm_vc5_create_bo, if that was called + * from this DRM fd. + */ +struct drm_vc5_get_bo_offset { + __u32 handle; + __u32 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _VC5_DRM_H_ */ diff --git a/src/gallium/drivers/vc5/vc5_emit.c b/src/gallium/drivers/vc5/vc5_emit.c new file mode 100644 index 00000000000..29ccfcdacdb --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_emit.c @@ -0,0 +1,449 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_half.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +static uint8_t +vc5_factor(enum pipe_blendfactor factor) +{ + /* We may get a bad blendfactor when blending is disabled. */ + if (factor == 0) + return V3D_BLEND_FACTOR_ZERO; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + case PIPE_BLENDFACTOR_ONE: + return V3D_BLEND_FACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V3D_BLEND_FACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V3D_BLEND_FACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return V3D_BLEND_FACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V3D_BLEND_FACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V3D_BLEND_FACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V3D_BLEND_FACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V3D_BLEND_FACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V3D_BLEND_FACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V3D_BLEND_FACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V3D_BLEND_FACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V3D_BLEND_FACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V3D_BLEND_FACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Bad blend factor"); + } +} + +static inline uint16_t +swizzled_border_color(struct pipe_sampler_state *sampler, + struct vc5_sampler_view *sview, + int chan) +{ + const struct util_format_description *desc = + util_format_description(sview->base.format); + uint8_t swiz = chan; + + /* If we're doing swizzling in the sampler, then only rearrange the + * border color for the mismatch between the VC5 texture format and + * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by + * the sampler's swizzle. + * + * For swizzling in the shader, we don't do any pre-swizzling of the + * border color. + */ + if (vc5_get_tex_return_size(sview->base.format) != 32) + swiz = desc->swizzle[swiz]; + + switch (swiz) { + case PIPE_SWIZZLE_0: + return util_float_to_half(0.0); + case PIPE_SWIZZLE_1: + return util_float_to_half(1.0); + default: + return util_float_to_half(sampler->border_color.f[swiz]); + } +} + +static void +emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, + int i) +{ + struct vc5_job *job = vc5->job; + struct pipe_sampler_state *psampler = stage_tex->samplers[i]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + struct pipe_sampler_view *psview = stage_tex->textures[i]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct pipe_resource *prsc = psview->texture; + struct vc5_resource *rsc = vc5_resource(prsc); + + stage_tex->texture_state[i].offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(TEXTURE_SHADER_STATE), + 32); + vc5_bo_set_reference(&stage_tex->texture_state[i].bo, + job->indirect.bo); + + struct V3D33_TEXTURE_SHADER_STATE unpacked = { + /* XXX */ + .border_color_red = swizzled_border_color(psampler, sview, 0), + .border_color_green = swizzled_border_color(psampler, sview, 1), + .border_color_blue = swizzled_border_color(psampler, sview, 2), + .border_color_alpha = swizzled_border_color(psampler, sview, 3), + + /* XXX: Disable min/maxlod for txf */ + .max_level_of_detail = MIN2(MIN2(psampler->max_lod, + VC5_MAX_MIP_LEVELS), + psview->u.tex.last_level), + + .texture_base_pointer = cl_address(rsc->bo, + rsc->slices[0].offset), + }; + + int min_img_filter = psampler->min_img_filter; + int min_mip_filter = psampler->min_mip_filter; + int mag_img_filter = psampler->mag_img_filter; + + if (vc5_get_tex_return_size(psview->format) == 32) { + min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST); + switch (min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + unpacked.minification_filter = 0 + min_nearest; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + unpacked.minification_filter = 2 + !min_nearest; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + unpacked.minification_filter = 4 + !min_nearest; + break; + } + unpacked.magnification_filter = (mag_img_filter == + PIPE_TEX_FILTER_NEAREST); + + uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; + cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); + + for (int i = 0; i < ARRAY_SIZE(packed); i++) + packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; + + cl_emit_prepacked(&job->indirect, &packed); +} + +static void +emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) + emit_one_texture(vc5, stage_tex, i); +} + +void +vc5_emit_state(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5->job; + + if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | + VC5_DIRTY_RASTERIZER)) { + float *vpscale = vc5->viewport.scale; + float *vptranslate = vc5->viewport.translate; + float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; + float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; + float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; + float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; + + /* Clip to the scissor if it's enabled, but still clip to the + * drawable regardless since that controls where the binner + * tries to put things. + * + * Additionally, always clip the rendering to the viewport, + * since the hardware does guardband clipping, meaning + * primitives would rasterize outside of the view volume. + */ + uint32_t minx, miny, maxx, maxy; + if (!vc5->rasterizer->base.scissor) { + minx = MAX2(vp_minx, 0); + miny = MAX2(vp_miny, 0); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); + } else { + minx = MAX2(vp_minx, vc5->scissor.minx); + miny = MAX2(vp_miny, vc5->scissor.miny); + maxx = MIN2(vp_maxx, vc5->scissor.maxx); + maxy = MIN2(vp_maxy, vc5->scissor.maxy); + } + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_height_in_pixels = maxy - miny; + clip.clip_window_width_in_pixels = maxx - minx; + clip.clip_window_height_in_pixels = maxy - miny; + } + + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); + } + + if (vc5->dirty & (VC5_DIRTY_RASTERIZER | + VC5_DIRTY_ZSA | + VC5_DIRTY_BLEND | + VC5_DIRTY_COMPILED_FS)) { + cl_emit(&job->bcl, CONFIGURATION_BITS, config) { + config.enable_forward_facing_primitive = + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_FRONT); + config.enable_reverse_facing_primitive = + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_BACK); + /* This seems backwards, but it's what gets the + * clipflat test to pass. + */ + config.clockwise_primitives = + vc5->rasterizer->base.front_ccw; + + config.enable_depth_offset = + vc5->rasterizer->base.offset_tri; + + config.rasterizer_oversample_mode = + vc5->rasterizer->base.multisample; + + config.blend_enable = vc5->blend->rt[0].blend_enable; + + config.early_z_updates_enable = true; + if (vc5->zsa->base.depth.enabled) { + config.z_updates_enable = + vc5->zsa->base.depth.writemask; + config.early_z_enable = + vc5->zsa->early_z_enable; + config.depth_test_function = + vc5->zsa->base.depth.func; + } else { + config.depth_test_function = PIPE_FUNC_ALWAYS; + } + } + + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_factor = + vc5->rasterizer->offset_factor; + depth.depth_offset_units = + vc5->rasterizer->offset_units; + } + + cl_emit(&job->bcl, POINT_SIZE, point_size) { + point_size.point_size = vc5->rasterizer->point_size; + } + + cl_emit(&job->bcl, LINE_WIDTH, line_width) { + line_width.line_width = vc5->rasterizer->base.line_width; + } + } + + if (vc5->dirty & VC5_DIRTY_VIEWPORT) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = + vc5->viewport.scale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = + vc5->viewport.scale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc5->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc5->viewport.scale[2]; + } + if (0 /* XXX */) { + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + clip.minimum_zw = (vc5->viewport.translate[2] - + vc5->viewport.scale[2]); + clip.maximum_zw = (vc5->viewport.translate[2] + + vc5->viewport.scale[2]); + } + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + vc5->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + vc5->viewport.translate[1]; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND) { + struct pipe_blend_state *blend = vc5->blend; + + cl_emit(&job->bcl, BLEND_CONFIG, config) { + struct pipe_rt_blend_state *rtblend = &blend->rt[0]; + + config.colour_blend_mode = rtblend->rgb_func; + config.colour_blend_dst_factor = + vc5_factor(rtblend->rgb_dst_factor); + config.colour_blend_src_factor = + vc5_factor(rtblend->rgb_src_factor); + + config.alpha_blend_mode = rtblend->alpha_func; + config.alpha_blend_dst_factor = + vc5_factor(rtblend->alpha_dst_factor); + config.alpha_blend_src_factor = + vc5_factor(rtblend->alpha_src_factor); + } + + cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { + mask.render_target_0_per_colour_component_write_masks = + (~blend->rt[0].colormask) & 0xf; + mask.render_target_1_per_colour_component_write_masks = + (~blend->rt[1].colormask) & 0xf; + mask.render_target_2_per_colour_component_write_masks = + (~blend->rt[2].colormask) & 0xf; + mask.render_target_3_per_colour_component_write_masks = + (~blend->rt[3].colormask) & 0xf; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) { + cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { + /* XXX: format-dependent swizzling */ + colour.red_f16 = vc5->blend_color.hf[2]; + colour.green_f16 = vc5->blend_color.hf[1]; + colour.blue_f16 = vc5->blend_color.hf[0]; + colour.alpha_f16 = vc5->blend_color.hf[3]; + } + } + + if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { + struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; + struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; + + cl_emit(&job->bcl, STENCIL_CONFIG, config) { + config.front_config = true; + config.back_config = !back->enabled; + + config.stencil_write_mask = front->writemask; + config.stencil_test_mask = front->valuemask; + + config.stencil_test_function = front->func; + config.stencil_pass_op = front->zpass_op; + config.depth_test_fail_op = front->zfail_op; + config.stencil_test_fail_op = front->fail_op; + + config.stencil_ref_value = vc5->stencil_ref.ref_value[0]; + } + + if (back->enabled) { + cl_emit(&job->bcl, STENCIL_CONFIG, config) { + config.front_config = false; + config.back_config = true; + + config.stencil_write_mask = back->writemask; + config.stencil_test_mask = back->valuemask; + + config.stencil_test_function = back->func; + config.stencil_pass_op = back->zpass_op; + config.depth_test_fail_op = back->zfail_op; + config.stencil_test_fail_op = back->fail_op; + + config.stencil_ref_value = + vc5->stencil_ref.ref_value[1]; + } + } + } + + if (vc5->dirty & VC5_DIRTY_FRAGTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_VERTTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { + /* XXX: Need to handle more than 24 entries. */ + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = 0; + + flags.flat_shade_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff; + + if (vc5->rasterizer->base.flatshade) { + flags.flat_shade_flags_for_varyings_v024 |= + vc5->prog.fs->prog_data.fs->color_inputs[0] & 0xfffff; + } + } + } + + if (vc5->dirty & VC5_DIRTY_STREAMOUT) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + + if (so->num_targets) { + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { + tfe.number_of_32_bit_output_buffer_address_following = + so->num_targets; + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + }; + + for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { + cl_emit_prepacked(&job->bcl, + &vc5->prog.bind_vs->tf_specs[i]); + } + + for (int i = 0; i < so->num_targets; i++) { + const struct pipe_stream_output_target *target = + so->targets[i]; + struct vc5_resource *rsc = + vc5_resource(target->buffer); + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { + output.address = + cl_address(rsc->bo, + target->buffer_offset); + }; + /* XXX: buffer_size? */ + } + } else { + /* XXX? */ + } + } +} diff --git a/src/gallium/drivers/vc5/vc5_fence.c b/src/gallium/drivers/vc5/vc5_fence.c new file mode 100644 index 00000000000..08de9bca5a1 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_fence.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_fence.c + * + * Seqno-based fence management. + * + * We have two mechanisms for waiting in our kernel API: You can wait on a BO + * to have all rendering to from any process to be completed, or wait on a + * seqno for that particular seqno to be passed. The fence API we're + * implementing is based on waiting for all rendering in the context to have + * completed (with no reference to what other processes might be doing with + * the same BOs), so we can just use the seqno of the last rendering we'd + * fired off as our fence marker. + */ + +#include "util/u_inlines.h" + +#include "vc5_screen.h" +#include "vc5_bufmgr.h" + +struct vc5_fence { + struct pipe_reference reference; + uint64_t seqno; +}; + +static void +vc5_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pp, + struct pipe_fence_handle *pf) +{ + struct vc5_fence **p = (struct vc5_fence **)pp; + struct vc5_fence *f = (struct vc5_fence *)pf; + struct vc5_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + free(old); + } + *p = f; +} + +static boolean +vc5_fence_finish(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_fence_handle *pf, + uint64_t timeout_ns) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence *f = (struct vc5_fence *)pf; + + return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait"); +} + +struct vc5_fence * +vc5_fence_create(struct vc5_screen *screen, uint64_t seqno) +{ + struct vc5_fence *f = calloc(1, sizeof(*f)); + + if (!f) + return NULL; + + pipe_reference_init(&f->reference, 1); + f->seqno = seqno; + + return f; +} + +void +vc5_fence_init(struct vc5_screen *screen) +{ + screen->base.fence_reference = vc5_fence_reference; + screen->base.fence_finish = vc5_fence_finish; +} diff --git a/src/gallium/drivers/vc5/vc5_formats.c b/src/gallium/drivers/vc5/vc5_formats.c new file mode 100644 index 00000000000..fe2600207e9 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_formats.c @@ -0,0 +1,415 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_formats.c + * + * Contains the table and accessors for VC5 texture and render target format + * support. + * + * The hardware has limited support for texture formats, and extremely limited + * support for render target formats. As a result, we emulate other formats + * in our shader code, and this stores the table for doing so. + */ + +#include "util/u_format.h" +#include "util/macros.h" + +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#define OUTPUT_IMAGE_FORMAT_NO 255 + +struct vc5_format { + /** Set if the pipe format is defined in the table. */ + bool present; + + /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + uint8_t rt_type; + + /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ + uint8_t tex_type; + + /** + * Swizzle to apply to the RGBA shader output for storing to the tile + * buffer, to the RGBA tile buffer to produce shader input (for + * blending), and for turning the rgba8888 texture sampler return + * value into shader rgba values. + */ + uint8_t swizzle[4]; + + /* Whether the return value is 16F/I/UI or 32F/I/UI. */ + uint8_t return_size; + + /* If return_size == 32, how many channels are returned by texturing. + * 16 always returns 2 pairs of 16 bit values. + */ + uint8_t return_channels; +}; + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \ + [PIPE_FORMAT_##pipe] = { \ + true, \ + OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + return_channels, \ + } + +#define SWIZ_X001 SWIZ(X, 0, 0, 1) +#define SWIZ_XY01 SWIZ(X, Y, 0, 1) +#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) +#define SWIZ_XYZW SWIZ(X, Y, Z, W) +#define SWIZ_YZWX SWIZ(Y, Z, W, X) +#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) +#define SWIZ_ZYXW SWIZ(Z, Y, X, W) +#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) +#define SWIZ_XXXY SWIZ(X, X, X, Y) +#define SWIZ_XXX1 SWIZ(X, X, X, 1) +#define SWIZ_XXXX SWIZ(X, X, X, X) +#define SWIZ_000X SWIZ(0, 0, 0, X) + +static const struct vc5_format vc5_format_table[] = { + FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_UNORM, RGBX8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_SRGB, SRGBX8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_UNORM, RGBX8, RGBA8, SWIZ_XYZ1, 16, 0), + FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), + FORMAT(B10G10R10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_ZYXW, 16, 0), + + FORMAT(B4G4R4A4_UNORM, ABGR4444, RGBA4, SWIZ_YZWX, 16, 0), + FORMAT(B4G4R4X4_UNORM, ABGR4444, RGBA4, SWIZ_YZW1, 16, 0), + + FORMAT(B5G5R5A1_UNORM, NO, RGB5_A1, SWIZ_YZWX, 16, 0), + FORMAT(B5G5R5X1_UNORM, NO, RGB5_A1, SWIZ_YZW1, 16, 0), + FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0), + + FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0), + FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0), + FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0), + FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0), + + FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1), + FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1), + FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0), + FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2), + FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2), + FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2), + + FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4), + + /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */ + FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1), + FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1), + FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1), + FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1), + FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1), + FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1), + FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2), + FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2), + + FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0), + FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0), + FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0), + FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0), + + FORMAT(R8_SINT, R8I, S8, SWIZ_X001, 16, 0), + FORMAT(R8_UINT, R8UI, S8, SWIZ_X001, 16, 0), + FORMAT(R8G8_SINT, RG8I, S16, SWIZ_XY01, 16, 0), + FORMAT(R8G8_UINT, RG8UI, S16, SWIZ_XY01, 16, 0), + FORMAT(R8G8B8A8_SINT, RGBA8I, R32F, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8A8_UINT, RGBA8UI, R32F, SWIZ_XYZW, 16, 0), + + FORMAT(R16_SINT, R16I, S16, SWIZ_X001, 16, 0), + FORMAT(R16_UINT, R16UI, S16, SWIZ_X001, 16, 0), + FORMAT(R16G16_SINT, RG16I, R32F, SWIZ_XY01, 16, 0), + FORMAT(R16G16_UINT, RG16UI, R32F, SWIZ_XY01, 16, 0), + FORMAT(R16G16B16A16_SINT, RGBA16I, RG32F, SWIZ_XYZW, 16, 0), + FORMAT(R16G16B16A16_UINT, RGBA16UI, RG32F, SWIZ_XYZW, 16, 0), + + FORMAT(R32_SINT, R32I, R32F, SWIZ_X001, 16, 0), + FORMAT(R32_UINT, R32UI, R32F, SWIZ_X001, 16, 0), + FORMAT(R32G32_SINT, RG32I, RG32F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_UINT, RG32UI, RG32F, SWIZ_XY01, 16, 0), + FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32F, SWIZ_XYZW, 16, 0), + + FORMAT(A8_SINT, R8I, S8, SWIZ_000X, 16, 0), + FORMAT(A8_UINT, R8UI, S8, SWIZ_000X, 16, 0), + FORMAT(A16_SINT, R16I, S16, SWIZ_000X, 16, 0), + FORMAT(A16_UINT, R16UI, S16, SWIZ_000X, 16, 0), + FORMAT(A32_SINT, R32I, R32F, SWIZ_000X, 16, 0), + FORMAT(A32_UINT, R32UI, R32F, SWIZ_000X, 16, 0), + + FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0), + FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0), + + FORMAT(S8_UINT_Z24_UNORM, DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_X001, 32, 1), + FORMAT(X8Z24_UNORM, DEPTH_COMPONENT24, DEPTH24_X8, SWIZ_X001, 32, 1), + FORMAT(S8X24_UINT, NO, R32F, SWIZ_X001, 32, 1), + FORMAT(Z32_FLOAT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0), + FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0), + + FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0), + FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0), + FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0), +}; + +static const struct vc5_format * +get_format(enum pipe_format f) +{ + if (f >= ARRAY_SIZE(vc5_format_table) || + !vc5_format_table[f].present) + return NULL; + else + return &vc5_format_table[f]; +} + +bool +vc5_rt_format_supported(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return false; + + return vf->rt_type != OUTPUT_IMAGE_FORMAT_NO; +} + +uint8_t +vc5_get_rt_format(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->rt_type; +} + +bool +vc5_tex_format_supported(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + return vf != NULL; +} + +uint8_t +vc5_get_tex_format(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->tex_type; +} + +uint8_t +vc5_get_tex_return_size(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->return_size; +} + +uint8_t +vc5_get_tex_return_channels(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->return_channels; +} + +const uint8_t * +vc5_get_format_swizzle(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + static const uint8_t fallback[] = {0, 1, 2, 3}; + + if (!vf) + return fallback; + + return vf->swizzle; +} + +void +vc5_get_internal_type_bpp_for_output_format(uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + switch (format) { + case OUTPUT_IMAGE_FORMAT_RGBA8: + case OUTPUT_IMAGE_FORMAT_RGBX8: + case OUTPUT_IMAGE_FORMAT_RGB8: + case OUTPUT_IMAGE_FORMAT_RG8: + case OUTPUT_IMAGE_FORMAT_R8: + case OUTPUT_IMAGE_FORMAT_ABGR4444: + case OUTPUT_IMAGE_FORMAT_BGR565: + case OUTPUT_IMAGE_FORMAT_ABGR1555: + *type = INTERNAL_TYPE_8; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA8I: + case OUTPUT_IMAGE_FORMAT_RG8I: + case OUTPUT_IMAGE_FORMAT_R8I: + *type = INTERNAL_TYPE_8I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA8UI: + case OUTPUT_IMAGE_FORMAT_RG8UI: + case OUTPUT_IMAGE_FORMAT_R8UI: + *type = INTERNAL_TYPE_8UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: + case OUTPUT_IMAGE_FORMAT_SRGB: + case OUTPUT_IMAGE_FORMAT_RGB10_A2: + case OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: + case OUTPUT_IMAGE_FORMAT_SRGBX8: + case OUTPUT_IMAGE_FORMAT_RGBA16F: + /* Note that sRGB RTs are stored in the tile buffer at 16F, + * and the conversion to sRGB happens at tilebuffer + * load/store. + */ + *type = INTERNAL_TYPE_16F; + *bpp = INTERNAL_BPP_64; + break; + + case OUTPUT_IMAGE_FORMAT_RG16F: + case OUTPUT_IMAGE_FORMAT_R16F: + *type = INTERNAL_TYPE_16F; + /* Use 64bpp to make sure the TLB doesn't throw away the alpha + * channel before alpha test happens. + */ + *bpp = INTERNAL_BPP_64; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA16I: + *type = INTERNAL_TYPE_16I; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_RG16I: + case OUTPUT_IMAGE_FORMAT_R16I: + *type = INTERNAL_TYPE_16I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA16UI: + *type = INTERNAL_TYPE_16UI; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_RG16UI: + case OUTPUT_IMAGE_FORMAT_R16UI: + *type = INTERNAL_TYPE_16UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_32; + break; + + default: + /* Provide some default values, as we'll be called at RB + * creation time, even if an RB with this format isn't + * supported. + */ + *type = INTERNAL_TYPE_8; + *bpp = INTERNAL_BPP_32; + break; + } +} diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c new file mode 100644 index 00000000000..57cf96725b9 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_job.c @@ -0,0 +1,429 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_job.c + * + * Functions for submitting VC5 render jobs to the kernel. + */ + +#include <xf86drm.h> +#include "vc5_context.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "broadcom/clif/clif_dump.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +remove_from_ht(struct hash_table *ht, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); +} + +static void +vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) +{ + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + vc5_bo_unreference(&bo); + } + + remove_from_ht(vc5->jobs, &job->key); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], NULL); + } + } + if (job->zsbuf) { + remove_from_ht(vc5->write_jobs, job->zsbuf->texture); + pipe_surface_reference(&job->zsbuf, NULL); + } + + if (vc5->job == job) + vc5->job = NULL; + + vc5_destroy_cl(&job->bcl); + vc5_destroy_cl(&job->rcl); + vc5_destroy_cl(&job->indirect); + vc5_bo_unreference(&job->tile_alloc); + + ralloc_free(job); +} + +static struct vc5_job * +vc5_job_create(struct vc5_context *vc5) +{ + struct vc5_job *job = rzalloc(vc5, struct vc5_job); + + job->vc5 = vc5; + + vc5_init_cl(job, &job->bcl); + vc5_init_cl(job, &job->rcl); + vc5_init_cl(job, &job->indirect); + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + job->bos = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + return job; +} + +void +vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo) +{ + if (!bo) + return; + + if (_mesa_set_search(job->bos, bo)) + return; + + vc5_bo_reference(bo); + _mesa_set_add(job->bos, bo); + + uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; + + if (job->submit.bo_handle_count >= job->bo_handles_size) { + job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); + bo_handles = reralloc(job, bo_handles, + uint32_t, job->bo_handles_size); + job->submit.bo_handles = (uintptr_t)(void *)bo_handles; + } + bo_handles[job->submit.bo_handle_count++] = bo->handle; +} + +void +vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +void +vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_flush_jobs_writing_resource(vc5, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + + if (_mesa_set_search(job->bos, rsc->bo)) { + vc5_job_submit(vc5, job); + /* Reminder: vc5->jobs is safe to keep iterating even + * after deletion of an entry. + */ + continue; + } + } +} + +static void +vc5_job_set_tile_buffer_size(struct vc5_job *job) +{ + static const uint8_t tile_sizes[] = { + 64, 64, + 64, 32, + 32, 32, + 32, 16, + 16, 16, + }; + int tile_size_index = 0; + if (job->msaa) + tile_size_index += 2; + + if (job->cbufs[3]) + tile_size_index += 2; + else if (job->cbufs[2]) + tile_size_index++; + + int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + struct vc5_surface *surf = vc5_surface(job->cbufs[i]); + max_bpp = MAX2(max_bpp, surf->internal_bpp); + } + } + job->internal_bpp = max_bpp; + STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); + tile_size_index += max_bpp; + + assert(tile_size_index < ARRAY_SIZE(tile_sizes)); + job->tile_width = tile_sizes[tile_size_index * 2 + 0]; + job->tile_height = tile_sizes[tile_size_index * 2 + 1]; +} + +/** + * Returns a vc5_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc5_job * +vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc5_job_key local_key = { + .cbufs = { + cbufs[0], + cbufs[1], + cbufs[2], + cbufs[3], + }, + .zsbuf = zsbuf, + }; + struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + struct vc5_job *job = vc5_job_create(vc5); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) { + vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], cbufs[i]); + + if (cbufs[i]->texture->nr_samples > 1) + job->msaa = true; + } + } + if (zsbuf) { + vc5_flush_jobs_reading_resource(vc5, zsbuf->texture); + pipe_surface_reference(&job->zsbuf, zsbuf); + if (zsbuf->texture->nr_samples > 1) + job->msaa = true; + } + + vc5_job_set_tile_buffer_size(job); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) + _mesa_hash_table_insert(vc5->write_jobs, + cbufs[i]->texture, job); + } + if (zsbuf) + _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job); + + memcpy(&job->key, &local_key, sizeof(local_key)); + _mesa_hash_table_insert(vc5->jobs, &job->key, job); + + return job; +} + +struct vc5_job * +vc5_get_job_for_fbo(struct vc5_context *vc5) +{ + if (vc5->job) + return vc5->job; + + struct pipe_surface **cbufs = vc5->framebuffer.cbufs; + struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf; + struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf); + + /* The dirty flags are tracking what's been updated while vc5->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. + */ + vc5->dirty = ~0; + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + for (int i = 0; i < 4; i++) { + if (cbufs[i]) { + struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0 << i; + } + } + + if (zsbuf) { + struct vc5_resource *rsc = vc5_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height, + job->tile_height); + + vc5->job = job; + + return job; +} + +static bool +vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr) +{ + struct vc5_job *job = data; + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (void *)entry->key; + + if (addr >= bo->offset && + addr < bo->offset + bo->size) { + vc5_bo_map(bo); + *vaddr = bo->map + addr - bo->offset; + return true; + } + } + + return false; +} + +static void +vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!(V3D_DEBUG & V3D_DEBUG_CL)) + return; + + struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo, + stderr, vc5_clif_dump_lookup, + job); + + fprintf(stderr, "BCL: 0x%08x..0x%08x\n", + job->submit.bcl_start, job->submit.bcl_end); + + clif_dump_add_cl(clif, job->submit.bcl_start); + + fprintf(stderr, "RCL: 0x%08x..0x%08x\n", + job->submit.rcl_start, job->submit.rcl_end); + clif_dump_add_cl(clif, job->submit.rcl_start); +} + +/** + * Submits the job to the kernel and then reinitializes it. + */ +void +vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!job->needs_flush) + goto done; + + /* The RCL setup would choke if the draw bounds cause no drawing, so + * just drop the drawing if that's the case. + */ + if (job->draw_max_x <= job->draw_min_x || + job->draw_max_y <= job->draw_min_y) { + goto done; + } + + vc5_emit_rcl(job); + + if (cl_offset(&job->bcl) > 0) { + vc5_cl_ensure_space_with_branch(&job->bcl, 2); + + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act + * until the FLUSH completes. + */ + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); + + /* The FLUSH caps all of our bin lists with a + * VC5_PACKET_RETURN. + */ + cl_emit(&job->bcl, FLUSH, flush); + } + + job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); + job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); + + vc5_clif_dump(vc5, job); + + if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { + int ret; + +#ifndef USE_VC5_SIMULATOR + ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit); +#else + ret = vc5_simulator_flush(vc5, &job->submit, job); +#endif + static bool warned = false; + if (ret && !warned) { + fprintf(stderr, "Draw call returned %s. " + "Expect corruption.\n", strerror(errno)); + warned = true; + } + } + + if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) { + if (!vc5_wait_seqno(vc5->screen, + vc5->last_emit_seqno - 5, + PIPE_TIMEOUT_INFINITE, + "job throttling")) { + fprintf(stderr, "Job throttling failed\n"); + } + } + +done: + vc5_job_free(vc5, job); +} + +static bool +vc5_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc5_job_key)) == 0; +} + +static uint32_t +vc5_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc5_job_key)); +} + +void +vc5_job_init(struct vc5_context *vc5) +{ + vc5->jobs = _mesa_hash_table_create(vc5, + vc5_job_hash, + vc5_job_compare); + vc5->write_jobs = _mesa_hash_table_create(vc5, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c new file mode 100644 index 00000000000..02625ed192b --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_program.c @@ -0,0 +1,565 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/ralloc.h" +#include "util/hash_table.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "nir/tgsi_to_nir.h" +#include "compiler/v3d_compiler.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, + const struct pipe_stream_output_info *stream_output) +{ + if (!stream_output->num_outputs) + return; + + struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; + int slot_count = 0; + + for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { + uint32_t buffer_offset = 0; + uint32_t vpm_start = slot_count; + + for (int i = 0; i < stream_output->num_outputs; i++) { + const struct pipe_stream_output *output = + &stream_output->output[i]; + + if (output->output_buffer != buffer) + continue; + + /* We assume that the SO outputs appear in increasing + * order in the buffer. + */ + assert(output->dst_offset >= buffer_offset); + + /* Pad any undefined slots in the output */ + for (int j = buffer_offset; j < output->dst_offset; j++) { + slots[slot_count] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); + slot_count++; + } + + /* Set the coordinate shader up to output the + * components of this varying. + */ + for (int j = 0; j < output->num_components; j++) { + slots[slot_count] = + v3d_slot_from_slot_and_component(VARYING_SLOT_VAR0 + + output->register_index, + output->start_component + j); + slot_count++; + } + } + + uint32_t vpm_size = slot_count - vpm_start; + if (!vpm_size) + continue; + + struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + .first_shaded_vertex_value_to_output = vpm_start, + .number_of_consecutive_vertex_values_to_output_as_32_bit_values = vpm_size, + .output_buffer_to_write_to = buffer, + }; + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs[so->num_tf_specs++], + &unpacked); + } + + so->num_tf_outputs = slot_count; + so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, + slot_count); + memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); +} + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static void * +vc5_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader); + if (!so) + return NULL; + + so->program_id = vc5->next_uncompiled_program_id++; + + nir_shader *s; + + if (cso->type == PIPE_SHADER_IR_NIR) { + /* The backend takes ownership of the NIR shader on state + * creation. + */ + s = cso->ir.nir; + + NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + if (V3D_DEBUG & V3D_DEBUG_TGSI) { + fprintf(stderr, "prog %d TGSI:\n", + so->program_id); + tgsi_dump(cso->tokens, 0); + fprintf(stderr, "\n"); + } + s = tgsi_to_nir(cso->tokens, &v3d_nir_options); + } + + NIR_PASS_V(s, nir_opt_global_to_local); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_normalize_cubemap_coords); + + NIR_PASS_V(s, nir_lower_load_const_to_scalar); + + v3d_optimize_nir(s); + + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + + /* Garbage collect dead instructions */ + nir_sweep(s); + + so->base.type = PIPE_SHADER_IR_NIR; + so->base.ir.nir = s; + + vc5_set_transform_feedback_outputs(so, &cso->stream_output); + + if (V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage(s->stage))) { + fprintf(stderr, "%s prog %d NIR:\n", + gl_shader_stage_name(s->stage), + so->program_id); + nir_print_shader(s, stderr); + fprintf(stderr, "\n"); + } + + return so; +} + +static struct vc5_compiled_shader * +vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key) +{ + struct vc5_uncompiled_shader *shader_state = key->shader_state; + nir_shader *s = shader_state->base.ir.nir; + + struct hash_table *ht; + uint32_t key_size; + if (s->stage == MESA_SHADER_FRAGMENT) { + ht = vc5->fs_cache; + key_size = sizeof(struct v3d_fs_key); + } else { + ht = vc5->vs_cache; + key_size = sizeof(struct v3d_vs_key); + } + + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + if (entry) + return entry->data; + + struct vc5_compiled_shader *shader = + rzalloc(NULL, struct vc5_compiled_shader); + + int program_id = shader_state->program_id; + int variant_id = + p_atomic_inc_return(&shader_state->compiled_variant_count); + uint64_t *qpu_insts; + uint32_t shader_size; + + switch (s->stage) { + case MESA_SHADER_VERTEX: + shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); + + qpu_insts = v3d_compile_vs(vc5->screen->compiler, + (struct v3d_vs_key *)key, + shader->prog_data.vs, s, + program_id, variant_id, + &shader_size); + break; + case MESA_SHADER_FRAGMENT: + shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); + + qpu_insts = v3d_compile_fs(vc5->screen->compiler, + (struct v3d_fs_key *)key, + shader->prog_data.fs, s, + program_id, variant_id, + &shader_size); + break; + default: + unreachable("bad stage"); + } + + vc5_set_shader_uniform_dirty_flags(shader); + + shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader"); + vc5_bo_map(shader->bo); + memcpy(shader->bo->map, qpu_insts, shader_size); + + free(qpu_insts); + + struct vc5_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + + return shader; +} + +static void +vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key, + struct vc5_texture_stateobj *texstate) +{ + for (int i = 0; i < texstate->num_textures; i++) { + struct pipe_sampler_view *sampler = texstate->textures[i]; + struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler); + struct pipe_sampler_state *sampler_state = + texstate->samplers[i]; + + if (!sampler) + continue; + + key->tex[i].return_size = + vc5_get_tex_return_size(sampler->format); + + /* For 16-bit, we set up the sampler to always return 2 + * channels (meaning no recompiles for most statechanges), + * while for 32 we actually scale the returns with channels. + */ + if (key->tex[i].return_size == 16) { + key->tex[i].return_channels = 2; + } else { + key->tex[i].return_channels = + vc5_get_tex_return_channels(sampler->format); + } + + if (vc5_get_tex_return_size(sampler->format) == 32) { + memcpy(key->tex[i].swizzle, + vc5_sampler->swizzle, + sizeof(vc5_sampler->swizzle)); + } else { + /* For 16-bit returns, we let the sampler state handle + * the swizzle. + */ + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } + + if (sampler->texture->nr_samples > 1) { + key->tex[i].msaa_width = sampler->texture->width0; + key->tex[i].msaa_height = sampler->texture->height0; + } else if (sampler){ + key->tex[i].compare_mode = sampler_state->compare_mode; + key->tex[i].compare_func = sampler_state->compare_func; + key->tex[i].wrap_s = sampler_state->wrap_s; + key->tex[i].wrap_t = sampler_state->wrap_t; + } + } + + key->ucp_enables = vc5->rasterizer->base.clip_plane_enable; +} + +static void +vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct vc5_job *job = vc5->job; + struct v3d_fs_key local_key; + struct v3d_fs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_BLEND | + VC5_DIRTY_FRAMEBUFFER | + VC5_DIRTY_ZSA | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_SAMPLE_MASK | + VC5_DIRTY_FRAGTEX | + VC5_DIRTY_UNCOMPILED_FS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex); + key->base.shader_state = vc5->prog.bind_fs; + key->is_points = (prim_mode == PIPE_PRIM_POINTS); + key->is_lines = (prim_mode >= PIPE_PRIM_LINES && + prim_mode <= PIPE_PRIM_LINE_STRIP); + key->clamp_color = vc5->rasterizer->base.clamp_fragment_color; + if (vc5->blend->logicop_enable) { + key->logicop_func = vc5->blend->logicop_func; + } else { + key->logicop_func = PIPE_LOGICOP_COPY; + } + if (job->msaa) { + key->msaa = vc5->rasterizer->base.multisample; + key->sample_coverage = (vc5->rasterizer->base.multisample && + vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage; + key->sample_alpha_to_one = vc5->blend->alpha_to_one; + } + + key->depth_enabled = (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled); + if (vc5->zsa->base.alpha.enabled) { + key->alpha_test = true; + key->alpha_test_func = vc5->zsa->base.alpha.func; + } + + if (vc5->framebuffer.cbufs[0]) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[0]; + const struct util_format_description *desc = + util_format_description(cbuf->format); + + key->swap_color_rb = desc->swizzle[0] == PIPE_SWIZZLE_Z; + } + + if (key->is_points) { + key->point_sprite_mask = + vc5->rasterizer->base.sprite_coord_enable; + key->point_coord_upper_left = + (vc5->rasterizer->base.sprite_coord_mode == + PIPE_SPRITE_COORD_UPPER_LEFT); + } + + key->light_twoside = vc5->rasterizer->base.light_twoside; + + struct vc5_compiled_shader *old_fs = vc5->prog.fs; + vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base); + if (vc5->prog.fs == old_fs) + return; + + vc5->dirty |= VC5_DIRTY_COMPILED_FS; + + if (old_fs && + (vc5->prog.fs->prog_data.fs->flat_shade_flags != + old_fs->prog_data.fs->flat_shade_flags || + (vc5->rasterizer->base.flatshade && + vc5->prog.fs->prog_data.fs->color_inputs != + old_fs->prog_data.fs->color_inputs))) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots, + old_fs->prog_data.fs->input_slots, + sizeof(vc5->prog.fs->prog_data.fs->input_slots))) { + vc5->dirty |= VC5_DIRTY_FS_INPUTS; + } +} + +static void +vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct v3d_vs_key local_key; + struct v3d_vs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_VERTTEX | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_UNCOMPILED_VS | + VC5_DIRTY_FS_INPUTS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->verttex); + key->base.shader_state = vc5->prog.bind_vs; + key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs; + STATIC_ASSERT(sizeof(key->fs_inputs) == + sizeof(vc5->prog.fs->prog_data.fs->input_slots)); + memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots, + sizeof(key->fs_inputs)); + key->clamp_color = vc5->rasterizer->base.clamp_vertex_color; + + key->per_vertex_point_size = + (prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + struct vc5_compiled_shader *vs = + vc5_get_compiled_shader(vc5, &key->base); + if (vs != vc5->prog.vs) { + vc5->prog.vs = vs; + vc5->dirty |= VC5_DIRTY_COMPILED_VS; + } + + key->is_coord = true; + /* Coord shaders only output varyings used by transform feedback. */ + struct vc5_uncompiled_shader *shader_state = key->base.shader_state; + memcpy(key->fs_inputs, shader_state->tf_outputs, + sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_fs_inputs) { + memset(&key->fs_inputs[shader_state->num_tf_outputs], + 0, + sizeof(*key->fs_inputs) * (key->num_fs_inputs - + shader_state->num_tf_outputs)); + } + key->num_fs_inputs = shader_state->num_tf_outputs; + + struct vc5_compiled_shader *cs = + vc5_get_compiled_shader(vc5, &key->base); + if (cs != vc5->prog.cs) { + vc5->prog.cs = cs; + vc5->dirty |= VC5_DIRTY_COMPILED_CS; + } +} + +void +vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode) +{ + vc5_update_compiled_fs(vc5, prim_mode); + vc5_update_compiled_vs(vc5, prim_mode); +} + +static uint32_t +fs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); +} + +static uint32_t +vs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); +} + +static bool +fs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; +} + +static bool +vs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; +} + +static void +delete_from_cache_if_matches(struct hash_table *ht, + struct vc5_compiled_shader **last_compile, + struct hash_entry *entry, + struct vc5_uncompiled_shader *so) +{ + const struct v3d_key *key = entry->key; + + if (key->shader_state == so) { + struct vc5_compiled_shader *shader = entry->data; + _mesa_hash_table_remove(ht, entry); + vc5_bo_unreference(&shader->bo); + + if (shader == *last_compile) + *last_compile = NULL; + + ralloc_free(shader); + } +} + +static void +vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = hwcso; + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs, + entry, so); + } + hash_table_foreach(vc5->vs_cache, entry) { + delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs, + entry, so); + } + + ralloc_free(so->base.ir.nir); + free(so); +} + +static void +vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_fs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS; +} + +static void +vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_vs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS; +} + +void +vc5_program_init(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + pctx->create_vs_state = vc5_shader_state_create; + pctx->delete_vs_state = vc5_shader_state_delete; + + pctx->create_fs_state = vc5_shader_state_create; + pctx->delete_fs_state = vc5_shader_state_delete; + + pctx->bind_fs_state = vc5_fp_state_bind; + pctx->bind_vs_state = vc5_vp_state_bind; + + vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, + fs_cache_compare); + vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, + vs_cache_compare); +} + +void +vc5_program_fini(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->fs_cache, entry); + } + + hash_table_foreach(vc5->vs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->vs_cache, entry); + } +} diff --git a/src/gallium/drivers/vc5/vc5_query.c b/src/gallium/drivers/vc5/vc5_query.c new file mode 100644 index 00000000000..c114e76eef0 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_query.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * Stub support for occlusion queries. + * + * Since we expose support for GL 2.0, we have to expose occlusion queries, + * but the spec allows you to expose 0 query counter bits, so we just return 0 + * as the result of all our queries. + */ +#include "vc5_context.h" + +struct vc5_query +{ + uint8_t pad; +}; + +static struct pipe_query * +vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct vc5_query *query = calloc(1, sizeof(*query)); + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)query; +} + +static void +vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + free(query); +} + +static boolean +vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +vc5_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static boolean +vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + uint64_t *result = &vresult->u64; + + *result = 0; + + return true; +} + +static void +vc5_set_active_query_state(struct pipe_context *pipe, boolean enable) +{ +} + +void +vc5_query_init(struct pipe_context *pctx) +{ + pctx->create_query = vc5_create_query; + pctx->destroy_query = vc5_destroy_query; + pctx->begin_query = vc5_begin_query; + pctx->end_query = vc5_end_query; + pctx->get_query_result = vc5_get_query_result; + pctx->set_active_query_state = vc5_set_active_query_state; +} + diff --git a/src/gallium/drivers/vc5/vc5_rcl.c b/src/gallium/drivers/vc5/vc5_rcl.c new file mode 100644 index 00000000000..287a35aa33e --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_rcl.c @@ -0,0 +1,218 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "vc5_context.h" +#include "vc5_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +void +vc5_emit_rcl(struct vc5_job *job) +{ + uint32_t min_x_tile = job->draw_min_x / job->tile_width; + uint32_t min_y_tile = job->draw_min_y / job->tile_height; + uint32_t max_x_tile = (job->draw_max_x - 1) / job->tile_width; + uint32_t max_y_tile = (job->draw_max_y - 1) / job->tile_height; + + /* The RCL list should be empty. */ + assert(!job->rcl.bo); + + vc5_cl_ensure_space(&job->rcl, + 256 + + (64 * + (max_x_tile - min_x_tile + 1) * + (max_y_tile - min_y_tile + 1)), 1); + + job->submit.rcl_start = job->rcl.bo->offset; + vc5_job_add_bo(job, job->rcl.bo); + + int nr_cbufs = 0; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) + nr_cbufs = i + 1; + } + + /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION + * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are + * optional updates to the previous HW state. + */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION, + config) { + config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH; + config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL; + + config.early_z_disable = !job->uses_early_z; + + config.image_width_pixels = job->draw_width; + config.image_height_pixels = job->draw_height; + + config.number_of_render_targets_minus_1 = + MAX2(nr_cbufs, 1) - 1; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + for (int i = 0; i < nr_cbufs; i++) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf) + continue; + + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + rt.address = cl_address(rsc->bo, surf->offset); + rt.internal_type = surf->internal_type; + rt.output_image_format = surf->format; + rt.memory_format = surf->tiling; + rt.internal_bpp = surf->internal_bpp; + rt.render_target_number = i; + + if (job->resolve & PIPE_CLEAR_COLOR0 << i) + rsc->writes++; + } + } + + /* TODO: Don't bother emitting if we don't load/clear Z/S. */ + if (job->zsbuf) { + struct pipe_surface *psurf = job->zsbuf; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) { + zs.address = cl_address(rsc->bo, surf->offset); + + zs.internal_type = surf->internal_type; + zs.output_image_format = surf->format; + + struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; + /* XXX */ + zs.padded_height_of_output_image_in_uif_blocks = + (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp)); + + assert(surf->tiling != VC5_TILING_RASTER); + zs.memory_format = surf->tiling; + } + + if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL) + rsc->writes++; + } + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1, + clear) { + clear.clear_color_low_32_bits = job->clear_color[0]; + }; + + /* Ends rendering mode config. */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES, + clear) { + clear.z_s_clear_value = job->clear_zs; + }; + + /* Always set initial block size before the first branch, which needs + * to match the value from binning mode config. + */ + cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); + + /* Start by clearing the tile buffer. */ + cl_emit(&job->rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = 0; + coords.tile_row_number = 0; + } + + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + + cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); + + const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 | + PIPE_CLEAR_COLOR1 | + PIPE_CLEAR_COLOR2 | + PIPE_CLEAR_COLOR3); + const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1); + + for (int y = min_y_tile; y <= max_y_tile; y++) { + for (int x = min_x_tile; x <= max_x_tile; x++) { + uint32_t read_but_not_cleared = job->resolve & ~job->cleared; + + /* The initial reload will be queued until we get the + * tile coordinates. + */ + if (read_but_not_cleared) { + cl_emit(&job->rcl, RELOAD_TILE_COLOUR_BUFFER, load) { + load.disable_colour_buffer_load = + (~read_but_not_cleared & pipe_clear_color_buffers) >> + first_color_buffer_bit; + load.enable_z_load = + read_but_not_cleared & PIPE_CLEAR_DEPTH; + load.enable_stencil_load = + read_but_not_cleared & PIPE_CLEAR_STENCIL; + } + } + + /* Tile Coordinates triggers the reload and sets where + * the stores go. There must be one per store packet. + */ + cl_emit(&job->rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = x; + coords.tile_row_number = y; + } + + cl_emit(&job->rcl, BRANCH_TO_AUTO_CHAINED_SUB_LIST, branch) { + uint32_t bin_tile_stride = + (align(job->draw_width, + job->tile_width) / + job->tile_width); + uint32_t bin_index = + (y * bin_tile_stride + x); + branch.address = cl_address(job->tile_alloc, + 64 * bin_index); + } + + cl_emit(&job->rcl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { + uint32_t color_write_enables = + job->resolve >> first_color_buffer_bit; + + store.disable_color_buffer_write = (~color_write_enables) & 0xf; + store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH; + store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL; + + store.disable_colour_buffers_clear_on_write = + (job->cleared & pipe_clear_color_buffers) == 0; + store.disable_z_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_DEPTH); + store.disable_stencil_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_STENCIL); + + store.last_tile_of_frame = (x == max_x_tile && + y == max_y_tile); + }; + } + } +} diff --git a/src/gallium/drivers/vc5/vc5_resource.c b/src/gallium/drivers/vc5/vc5_resource.c new file mode 100644 index 00000000000..8dbdb71e735 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_resource.c @@ -0,0 +1,758 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blit.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_upload_mgr.h" + +#include "drm_fourcc.h" +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" +#include "vc5_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) +#endif + +static bool +vc5_resource_bo_alloc(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base.b; + struct pipe_screen *pscreen = prsc->screen; + struct vc5_bo *bo; + int layers = (prsc->target == PIPE_TEXTURE_3D ? + prsc->depth0 : prsc->array_size); + + bo = vc5_bo_alloc(vc5_screen(pscreen), + rsc->slices[0].offset + + rsc->slices[0].size + + rsc->cube_map_stride * layers - 1, + "resource"); + if (bo) { + DBG(V3D_DEBUG_SURFACE, "alloc %p @ 0x%08x:\n", rsc, bo->offset); + vc5_bo_unreference(&rsc->bo); + rsc->bo = bo; + return true; + } else { + return false; + } +} + +static void +vc5_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_transfer *trans = vc5_transfer(ptrans); + + if (trans->map) { + struct vc5_resource *rsc; + struct vc5_resource_slice *slice; + if (trans->ss_resource) { + rsc = vc5_resource(trans->ss_resource); + slice = &rsc->slices[0]; + } else { + rsc = vc5_resource(ptrans->resource); + slice = &rsc->slices[ptrans->level]; + } + + if (ptrans->usage & PIPE_TRANSFER_WRITE) { + vc5_store_tiled_image(rsc->bo->map + slice->offset + + ptrans->box.z * rsc->cube_map_stride, + slice->stride, + trans->map, ptrans->stride, + slice->tiling, rsc->cpp, + rsc->base.b.height0, + &ptrans->box); + } + free(trans->map); + } + + if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) { + struct pipe_blit_info blit; + memset(&blit, 0, sizeof(blit)); + + blit.src.resource = trans->ss_resource; + blit.src.format = trans->ss_resource->format; + blit.src.box.width = trans->ss_box.width; + blit.src.box.height = trans->ss_box.height; + blit.src.box.depth = 1; + + blit.dst.resource = ptrans->resource; + blit.dst.format = ptrans->resource->format; + blit.dst.level = ptrans->level; + blit.dst.box = trans->ss_box; + + blit.mask = util_format_get_mask(ptrans->resource->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pctx->blit(pctx, &blit); + + pipe_resource_reference(&trans->ss_resource, NULL); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&vc5->transfer_pool, ptrans); +} + +static struct pipe_resource * +vc5_get_temp_resource(struct pipe_context *pctx, + struct pipe_resource *prsc, + const struct pipe_box *box) +{ + struct pipe_resource temp_setup; + + memset(&temp_setup, 0, sizeof(temp_setup)); + temp_setup.target = prsc->target; + temp_setup.format = prsc->format; + temp_setup.width0 = box->width; + temp_setup.height0 = box->height; + temp_setup.depth0 = 1; + temp_setup.array_size = 1; + + return pctx->screen->resource_create(pctx->screen, &temp_setup); +} + +static void * +vc5_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_transfer *trans; + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is + * being mapped. + */ + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && + prsc->last_level == 0 && + prsc->width0 == box->width && + prsc->height0 == box->height && + prsc->depth0 == box->depth && + prsc->array_size == 1 && + rsc->bo->private) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + if (vc5_resource_bo_alloc(rsc)) { + /* If it might be bound as one of our vertex buffers + * or UBOs, make sure we re-emit vertex buffer state + * or uniforms. + */ + if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) + vc5->dirty |= VC5_DIRTY_VTXBUF; + if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) + vc5->dirty |= VC5_DIRTY_CONSTBUF; + } else { + /* If we failed to reallocate, flush users so that we + * don't violate any syncing requirements. + */ + vc5_flush_jobs_reading_resource(vc5, prsc); + } + } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + /* If we're writing and the buffer is being used by the CL, we + * have to flush the CL first. If we're only reading, we need + * to flush if the CL has written our buffer. + */ + if (usage & PIPE_TRANSFER_WRITE) + vc5_flush_jobs_reading_resource(vc5, prsc); + else + vc5_flush_jobs_writing_resource(vc5, prsc); + } + + if (usage & PIPE_TRANSFER_WRITE) { + rsc->writes++; + rsc->initialized_buffers = ~0; + } + + trans = slab_alloc(&vc5->transfer_pool); + if (!trans) + return NULL; + + /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ + + /* slab_alloc_st() doesn't zero: */ + memset(trans, 0, sizeof(*trans)); + ptrans = &trans->base; + + pipe_resource_reference(&ptrans->resource, prsc); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + + /* If the resource is multisampled, we need to resolve to single + * sample. This seems like it should be handled at a higher layer. + */ + if (prsc->nr_samples > 1) { + trans->ss_resource = vc5_get_temp_resource(pctx, prsc, box); + if (!trans->ss_resource) + goto fail; + assert(!trans->ss_resource->nr_samples); + + /* The ptrans->box gets modified for tile alignment, so save + * the original box for unmap time. + */ + trans->ss_box = *box; + + if (usage & PIPE_TRANSFER_READ) { + struct pipe_blit_info blit; + memset(&blit, 0, sizeof(blit)); + + blit.src.resource = ptrans->resource; + blit.src.format = ptrans->resource->format; + blit.src.level = ptrans->level; + blit.src.box = trans->ss_box; + + blit.dst.resource = trans->ss_resource; + blit.dst.format = trans->ss_resource->format; + blit.dst.box.width = trans->ss_box.width; + blit.dst.box.height = trans->ss_box.height; + blit.dst.box.depth = 1; + + blit.mask = util_format_get_mask(prsc->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pctx->blit(pctx, &blit); + vc5_flush_jobs_writing_resource(vc5, blit.dst.resource); + } + + /* The rest of the mapping process should use our temporary. */ + prsc = trans->ss_resource; + rsc = vc5_resource(prsc); + ptrans->box.x = 0; + ptrans->box.y = 0; + ptrans->box.z = 0; + } + + /* Note that the current kernel implementation is synchronous, so no + * need to do syncing stuff here yet. + */ + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = vc5_bo_map_unsynchronized(rsc->bo); + else + buf = vc5_bo_map(rsc->bo); + if (!buf) { + fprintf(stderr, "Failed to map bo\n"); + goto fail; + } + + *pptrans = ptrans; + + struct vc5_resource_slice *slice = &rsc->slices[level]; + if (rsc->tiled) { + /* No direct mappings of tiled, since we need to manually + * tile/untile. + */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + ptrans->stride = ptrans->box.width * rsc->cpp; + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); + + if (usage & PIPE_TRANSFER_READ) { + vc5_load_tiled_image(trans->map, ptrans->stride, + buf + slice->offset + + ptrans->box.z * rsc->cube_map_stride, + slice->stride, + slice->tiling, rsc->cpp, + rsc->base.b.height0, + &ptrans->box); + } + return trans->map; + } else { + ptrans->stride = slice->stride; + ptrans->layer_stride = ptrans->stride; + + return buf + slice->offset + + ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride + + ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp + + ptrans->box.z * rsc->cube_map_stride; + } + + +fail: + vc5_resource_transfer_unmap(pctx, ptrans); + return NULL; +} + +static void +vc5_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + vc5_bo_unreference(&rsc->bo); + free(rsc); +} + +static boolean +vc5_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *prsc, + struct winsys_handle *whandle) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_bo *bo = rsc->bo; + + whandle->stride = rsc->slices[0].stride; + + /* If we're passing some reference to our BO out to some other part of + * the system, then we can't do any optimizations about only us being + * the ones seeing it (like BO caching). + */ + bo->private = false; + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + return vc5_bo_flink(bo, &whandle->handle); + case DRM_API_HANDLE_TYPE_KMS: + whandle->handle = bo->handle; + return TRUE; + case DRM_API_HANDLE_TYPE_FD: + whandle->handle = vc5_bo_get_dmabuf(bo); + return whandle->handle != -1; + } + + return FALSE; +} + +static const struct u_resource_vtbl vc5_resource_vtbl = { + .resource_get_handle = vc5_resource_get_handle, + .resource_destroy = vc5_resource_destroy, + .transfer_map = vc5_resource_transfer_map, + .transfer_flush_region = u_default_transfer_flush_region, + .transfer_unmap = vc5_resource_transfer_unmap, +}; + +static void +vc5_setup_slices(struct vc5_resource *rsc, const char *caller) +{ + struct pipe_resource *prsc = &rsc->base.b; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t pot_width = util_next_power_of_two(width); + uint32_t pot_height = util_next_power_of_two(height); + uint32_t offset = 0; + uint32_t utile_w = vc5_utile_width(rsc->cpp); + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_w = utile_w * 2; + uint32_t uif_block_h = utile_h * 2; + bool uif_top = false; + + for (int i = prsc->last_level; i >= 0; i--) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + uint32_t level_width, level_height; + if (i < 2) { + level_width = u_minify(width, i); + level_height = u_minify(height, i); + } else { + level_width = u_minify(pot_width, i); + level_height = u_minify(pot_height, i); + } + + if (!rsc->tiled) { + slice->tiling = VC5_TILING_RASTER; + if (prsc->nr_samples > 1) { + /* MSAA (4x) surfaces are stored as raw tile buffer contents. */ + level_width = align(level_width, 32); + level_height = align(level_height, 32); + } + } else { + if ((i != 0 || !uif_top) && + (level_width <= utile_w || + level_height <= utile_h)) { + slice->tiling = VC5_TILING_LINEARTILE; + level_width = align(level_width, utile_w); + level_height = align(level_height, utile_h); + } else if ((i != 0 || !uif_top) && + level_width <= uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + level_width = align(level_width, uif_block_w); + level_height = align(level_height, uif_block_h); + } else if ((i != 0 || !uif_top) && + level_width <= 2 * uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + level_width = align(level_width, 2 * uif_block_w); + level_height = align(level_height, uif_block_h); + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + + level_width = align(level_width, + 4 * uif_block_w); + level_height = align(level_height, + 4 * uif_block_h); + } + } + + slice->offset = offset; + slice->stride = (level_width * rsc->cpp * + MAX2(prsc->nr_samples, 1)); + slice->size = level_height * slice->stride; + + offset += slice->size; + + if (V3D_DEBUG & V3D_DEBUG_SURFACE) { + static const char *const tiling_descriptions[] = { + [VC5_TILING_RASTER] = "R", + [VC5_TILING_LINEARTILE] = "LT", + [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1", + [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2", + [VC5_TILING_UIF_NO_XOR] = "UIF", + [VC5_TILING_UIF_XOR] = "UIF^", + }; + + fprintf(stderr, + "rsc %s %p (format %s), %dx%d: " + "level %d (%s) %dx%d -> %dx%d, stride %d@0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + i, tiling_descriptions[slice->tiling], + u_minify(prsc->width0, i), + u_minify(prsc->height0, i), + level_width, level_height, + slice->stride, slice->offset); + } + } + + /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only + * needs to be aligned to utile boundaries. Since tiles are laid out + * from small to big in memory, we need to align the later UIF slices + * to UIF blocks, if they were preceded by non-UIF-block-aligned LT + * slices. + * + * We additionally align to 4k, which improves UIF XOR performance. + */ + uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - + rsc->slices[0].offset); + if (page_align_offset) { + for (int i = 0; i <= prsc->last_level; i++) + rsc->slices[i].offset += page_align_offset; + } + + /* Arrays, cubes, and 3D textures have a stride which is the distance + * from one full mipmap tree to the next (64b aligned). + */ + rsc->cube_map_stride = align(rsc->slices[0].offset + + rsc->slices[0].size, 64); +} + +static struct vc5_resource * +vc5_resource_setup(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource); + if (!rsc) + return NULL; + struct pipe_resource *prsc = &rsc->base.b; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + rsc->base.vtbl = &vc5_resource_vtbl; + if (prsc->nr_samples <= 1) + rsc->cpp = util_format_get_blocksize(tmpl->format); + else + rsc->cpp = sizeof(uint32_t); + + assert(rsc->cpp); + + return rsc; +} + +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + +static struct pipe_resource * +vc5_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, + int count) +{ + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base.b; + /* Use a tiled layout if we can, for better 3D performance. */ + bool should_tile = true; + + /* VBOs/PBOs are untiled (and 1 height). */ + if (tmpl->target == PIPE_BUFFER) + should_tile = false; + + /* Cursors are always linear, and the user can request linear as well. + */ + if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) + should_tile = false; + + /* Scanout BOs for simulator need to be linear for interaction with + * i965. + */ + if (using_vc5_simulator && + tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + should_tile = false; + + /* No user-specified modifier; determine our own. */ + if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { + linear_ok = true; + rsc->tiled = should_tile; + } else if (should_tile && + find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + modifiers, count)) { + rsc->tiled = true; + } else if (linear_ok) { + rsc->tiled = false; + } else { + fprintf(stderr, "Unsupported modifier requested\n"); + return NULL; + } + + if (tmpl->target != PIPE_BUFFER) + rsc->tex_format = vc5_get_tex_format(prsc->format); + + vc5_setup_slices(rsc, "create"); + if (!vc5_resource_bo_alloc(rsc)) + goto fail; + + return prsc; +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +struct pipe_resource * +vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + +static struct pipe_resource * +vc5_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base.b; + struct vc5_resource_slice *slice = &rsc->slices[0]; + + if (!rsc) + return NULL; + + switch (whandle->modifier) { + case DRM_FORMAT_MOD_LINEAR: + rsc->tiled = false; + break; + /* XXX: UIF */ + default: + fprintf(stderr, + "Attempt to import unsupported modifier 0x%llx\n", + (long long)whandle->modifier); + goto fail; + } + + if (whandle->offset != 0) { + fprintf(stderr, + "Attempt to import unsupported winsys offset %u\n", + whandle->offset); + goto fail; + } + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + rsc->bo = vc5_bo_open_name(screen, + whandle->handle, whandle->stride); + break; + case DRM_API_HANDLE_TYPE_FD: + rsc->bo = vc5_bo_open_dmabuf(screen, + whandle->handle, whandle->stride); + break; + default: + fprintf(stderr, + "Attempt to import unsupported handle type %d\n", + whandle->type); + goto fail; + } + + if (!rsc->bo) + goto fail; + + vc5_setup_slices(rsc, "import"); + + rsc->tex_format = vc5_get_tex_format(prsc->format); + + DBG(V3D_DEBUG_SURFACE, + "rsc import %p (format %s), %dx%d: " + "level 0 (R) -> stride %d@0x%08x\n", + rsc, util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + slice->stride, slice->offset); + + if (whandle->stride != slice->stride) { + static bool warned = false; + if (!warned) { + warned = true; + fprintf(stderr, + "Attempting to import %dx%d %s with " + "unsupported stride %d instead of %d\n", + prsc->width0, prsc->height0, + util_format_short_name(prsc->format), + whandle->stride, + slice->stride); + } + goto fail; + } + + return prsc; + +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_surface * +vc5_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ + struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface); + struct vc5_resource *rsc = vc5_resource(ptex); + + if (!surface) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + surface->offset = (rsc->slices[level].offset + + psurf->u.tex.first_layer * rsc->cube_map_stride); + surface->tiling = rsc->slices[level].tiling; + surface->format = vc5_get_rt_format(psurf->format); + + if (util_format_is_depth_or_stencil(psurf->format)) { + switch (psurf->format) { + case PIPE_FORMAT_Z16_UNORM: + surface->internal_type = INTERNAL_TYPE_DEPTH_16; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + surface->internal_type = INTERNAL_TYPE_DEPTH_32F; + break; + default: + surface->internal_type = INTERNAL_TYPE_DEPTH_24; + } + } else { + uint32_t bpp, type; + vc5_get_internal_type_bpp_for_output_format(surface->format, + &type, &bpp); + surface->internal_type = type; + surface->internal_bpp = bpp; + } + + return &surface->base; +} + +static void +vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} + +static void +vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + /* All calls to flush_resource are followed by a flush of the context, + * so there's nothing to do. + */ +} + +void +vc5_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create_with_modifiers = + vc5_resource_create_with_modifiers; + pscreen->resource_create = vc5_resource_create; + pscreen->resource_from_handle = vc5_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} + +void +vc5_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_map_vtbl; + pctx->transfer_flush_region = u_transfer_flush_region_vtbl; + pctx->transfer_unmap = u_transfer_unmap_vtbl; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->create_surface = vc5_create_surface; + pctx->surface_destroy = vc5_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = vc5_blit; + pctx->flush_resource = vc5_flush_resource; +} diff --git a/src/gallium/drivers/vc5/vc5_resource.h b/src/gallium/drivers/vc5/vc5_resource.h new file mode 100644 index 00000000000..3440fdc947b --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_resource.h @@ -0,0 +1,158 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_RESOURCE_H +#define VC5_RESOURCE_H + +#include "vc5_screen.h" +#include "util/u_transfer.h" + +/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These + * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB + * page. Those pages are then arranged left-to-right, top-to-bottom, to cover + * an image. + * + * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte + * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). + */ + +/** + * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum vc5_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + VC5_TILING_RASTER, + + /* Single line of u-tiles. */ + VC5_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_XOR, +}; + +struct vc5_transfer { + struct pipe_transfer base; + void *map; + + struct pipe_resource *ss_resource; + struct pipe_box ss_box; +}; + +struct vc5_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t size; + enum vc5_tiling_mode tiling; +}; + +struct vc5_surface { + struct pipe_surface base; + uint32_t offset; + enum vc5_tiling_mode tiling; + /** + * Output image format for TILE_RENDERING_MODE_CONFIGURATION + */ + uint8_t format; + + /** + * Internal format of the tile buffer for + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_type; + + /** + * internal bpp value (0=32bpp, 2=128bpp) for color buffers in + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_bpp; +}; + +struct vc5_resource { + struct u_resource base; + struct vc5_bo *bo; + struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS]; + uint32_t cube_map_stride; + int cpp; + bool tiled; + /** One of V3D_TEXTURE_DATA_FORMAT_* */ + uint8_t tex_format; + + /** + * Number of times the resource has been written to. + * + * This is used to track whether we need to load the surface on first + * rendering. + */ + uint64_t writes; + + /** + * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL + * for which parts of the resource are defined. + * + * Used for avoiding fallback to quad clears for clearing just depth, + * when the stencil contents have never been initialized. Note that + * we're lazy and fields not present in the buffer (DEPTH in a color + * buffer) may get marked. + */ + uint32_t initialized_buffers; +}; + +static inline struct vc5_resource * +vc5_resource(struct pipe_resource *prsc) +{ + return (struct vc5_resource *)prsc; +} + +static inline struct vc5_surface * +vc5_surface(struct pipe_surface *psurf) +{ + return (struct vc5_surface *)psurf; +} + +static inline struct vc5_transfer * +vc5_transfer(struct pipe_transfer *ptrans) +{ + return (struct vc5_transfer *)ptrans; +} + +void vc5_resource_screen_init(struct pipe_screen *pscreen); +void vc5_resource_context_init(struct pipe_context *pctx); +struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl); + +#endif /* VC5_RESOURCE_H */ diff --git a/src/gallium/drivers/vc5/vc5_screen.c b/src/gallium/drivers/vc5/vc5_screen.c new file mode 100644 index 00000000000..d3c9f0962e3 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_screen.c @@ -0,0 +1,620 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "os/os_misc.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/ralloc.h" + +#include <xf86drm.h> +#include "vc5_drm.h" +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" +#include "compiler/v3d_compiler.h" + +static const char * +vc5_screen_get_name(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (!screen->name) { + screen->name = ralloc_asprintf(screen, + "VC5 V3D %d.%d", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + } + + return screen->name; +} + +static const char * +vc5_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "Broadcom"; +} + +static void +vc5_screen_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + util_hash_table_destroy(screen->bo_handles); + vc5_bufmgr_destroy(pscreen); + slab_destroy_parent(&screen->transfer_pool); + + if (using_vc5_simulator) + vc5_simulator_destroy(screen); + + v3d_compiler_free(screen->compiler); + + close(screen->fd); + ralloc_free(pscreen); +} + +static int +vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_SM3: + case PIPE_CAP_INDEP_BLEND_ENABLE: /* XXX */ + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_DRAW_INDIRECT: + return 1; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 400; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + return 1; + + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 64; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + + /* Unsupported features. */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_CULL_DISTANCE: + case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: + case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_MAX_WINDOW_RECTANGLES: + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_INT64: + case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_DOUBLES: + case PIPE_CAP_BINDLESS_TEXTURE: + case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_TGSI_BALLOT: + case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: + case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: + case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_NATIVE_FENCE_FD: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: + case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_TILE_RASTER_ORDER: + return 0; + + /* Geometry shader output, unsupported. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return VC5_MAX_MIP_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 256; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + + /* Queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + case PIPE_CAP_VENDOR_ID: + return 0x14E4; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + + default: + fprintf(stderr, "unknown param %d\n", param); + return 0; + } +} + +static float +vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 32; + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 512.0f; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 0.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + default: + fprintf(stderr, "unknown paramf %d\n", param); + return 0; + } +} + +static int +vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return UINT_MAX; + + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VC5_MAX_FS_INPUTS / 4; + else + return 16; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return shader == PIPE_SHADER_FRAGMENT ? 4 : 8; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + return 0; + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return VC5_MAX_TEXTURE_SAMPLERS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; + default: + fprintf(stderr, "unknown shader param %d\n", param); + return 0; + } + return 0; +} + +static boolean +vc5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) + return FALSE; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + !util_format_is_supported(format, usage)) { + return FALSE; + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8_SSCALED: + retval |= PIPE_BIND_VERTEX_BUFFER; + break; + default: + break; + } + } + + if ((usage & PIPE_BIND_RENDER_TARGET) && + vc5_rt_format_supported(format)) { + retval |= PIPE_BIND_RENDER_TARGET; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + vc5_tex_format_supported(format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_Z16_UNORM || + format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT || + format == PIPE_FORMAT_I32_UINT)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + +#if 0 + if (retval != usage) { + fprintf(stderr, + "not supported: format=%s, target=%d, sample_count=%d, " + "usage=0x%x, retval=0x%x\n", util_format_name(format), + target, sample_count, usage, retval); + } +#endif + + return retval == usage; +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static bool +vc5_get_device_info(struct vc5_screen *screen) +{ + struct drm_vc5_get_param ident0 = { + .param = DRM_VC5_PARAM_V3D_CORE0_IDENT0, + }; + struct drm_vc5_get_param ident1 = { + .param = DRM_VC5_PARAM_V3D_CORE0_IDENT1, + }; + int ret; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident0); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n", + strerror(errno)); + return false; + } + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident1); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n", + strerror(errno)); + return false; + } + + uint32_t major = (ident0.value >> 24) & 0xff; + uint32_t minor = (ident1.value >> 0) & 0xf; + screen->devinfo.ver = major * 10 + minor; + + if (screen->devinfo.ver != 33) { + fprintf(stderr, + "V3D %d.%d not supported by this version of Mesa.\n", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + return false; + } + + return true; +} + +static const void * +vc5_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &v3d_nir_options; +} + +struct pipe_screen * +vc5_screen_create(int fd) +{ + struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen); + struct pipe_screen *pscreen; + + pscreen = &screen->base; + + pscreen->destroy = vc5_screen_destroy; + pscreen->get_param = vc5_screen_get_param; + pscreen->get_paramf = vc5_screen_get_paramf; + pscreen->get_shader_param = vc5_screen_get_shader_param; + pscreen->context_create = vc5_context_create; + pscreen->is_format_supported = vc5_screen_is_format_supported; + + screen->fd = fd; + list_inithead(&screen->bo_cache.time_list); + (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); + +#if defined(USE_VC5_SIMULATOR) + vc5_simulator_init(screen); +#endif + + if (!vc5_get_device_info(screen)) + goto fail; + + slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16); + + vc5_fence_init(screen); + + v3d_process_debug_variable(); + + vc5_resource_screen_init(pscreen); + + screen->compiler = v3d_compiler_init(&screen->devinfo); + + pscreen->get_name = vc5_screen_get_name; + pscreen->get_vendor = vc5_screen_get_vendor; + pscreen->get_device_vendor = vc5_screen_get_vendor; + pscreen->get_compiler_options = vc5_screen_get_compiler_options; + + return pscreen; + +fail: + close(fd); + ralloc_free(pscreen); + return NULL; +} diff --git a/src/gallium/drivers/vc5/vc5_screen.h b/src/gallium/drivers/vc5/vc5_screen.h new file mode 100644 index 00000000000..d804efa1bb7 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_screen.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_SCREEN_H +#define VC5_SCREEN_H + +#include "pipe/p_screen.h" +#include "os/os_thread.h" +#include "state_tracker/drm_driver.h" +#include "util/list.h" +#include "util/slab.h" +#include "broadcom/common/v3d_debug.h" +#include "broadcom/common/v3d_device_info.h" + +struct vc5_bo; + +#define VC5_MAX_MIP_LEVELS 12 +#define VC5_MAX_TEXTURE_SAMPLERS 32 +#define VC5_MAX_SAMPLES 4 +#define VC5_MAX_DRAW_BUFFERS 4 + +struct vc5_simulator_file; + +struct vc5_screen { + struct pipe_screen base; + int fd; + + struct v3d_device_info devinfo; + + const char *name; + + /** The last seqno we've completed a wait for. + * + * This lets us slightly optimize our waits by skipping wait syscalls + * if we know the job's already done. + */ + uint64_t finished_seqno; + + struct slab_parent_pool transfer_pool; + + struct vc5_bo_cache { + /** List of struct vc5_bo freed, by age. */ + struct list_head time_list; + /** List of struct vc5_bo freed, per size, by age. */ + struct list_head *size_list; + uint32_t size_list_size; + + mtx_t lock; + + uint32_t bo_size; + uint32_t bo_count; + } bo_cache; + + const struct v3d_compiler *compiler; + + struct util_hash_table *bo_handles; + mtx_t bo_handles_mutex; + + uint32_t bo_size; + uint32_t bo_count; + + struct vc5_simulator_file *sim_file; +}; + +static inline struct vc5_screen * +vc5_screen(struct pipe_screen *screen) +{ + return (struct vc5_screen *)screen; +} + +struct pipe_screen *vc5_screen_create(int fd); + +void +vc5_fence_init(struct vc5_screen *screen); + +struct vc5_fence * +vc5_fence_create(struct vc5_screen *screen, uint64_t seqno); + +#endif /* VC5_SCREEN_H */ diff --git a/src/gallium/drivers/vc5/vc5_simulator.c b/src/gallium/drivers/vc5/vc5_simulator.c new file mode 100644 index 00000000000..3f783ea5b13 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_simulator.c @@ -0,0 +1,736 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator.c + * + * Implements VC5 simulation on top of a non-VC5 GEM fd. + * + * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on + * top of the simpenrose software simulator. Generally, VC5 driver BOs have a + * GEM-side copy of their contents and a simulator-side memory area that the + * GEM contents get copied into during simulation. Once simulation is done, + * the simulator's data is copied back out to the GEM BOs, so that rendering + * appears on the screen as if actual hardware rendering had been done. + * + * One of the limitations of this code is that we shouldn't really need a + * GEM-side BO for non-window-system BOs. However, do we need unique BO + * handles for each of our GEM bos so that this file can look up its state + * from the handle passed in at submit ioctl time (also, a couple of places + * outside of this file still call ioctls directly on the fd). + * + * Another limitation is that BO import doesn't work unless the underlying + * window system's BO size matches what VC5 is going to use, which of course + * doesn't work out in practice. This means that for now, only DRI3 (VC5 + * makes the winsys BOs) is supported, not DRI2 (window system makes the winys + * BOs). + */ + +#ifdef USE_VC5_SIMULATOR + +#include <sys/mman.h> +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "util/u_memory.h" +#include "util/u_mm.h" + +#define HW_REGISTER_RO(x) (x) +#define HW_REGISTER_RW(x) (x) +#include "libs/core/v3d/registers/3.3.0.0/v3d.h" + +#include "vc5_screen.h" +#include "vc5_context.h" +#define V3D_TECH_VERSION 3 +#define V3D_REVISION 3 +#define V3D_SUB_REV 0 +#define V3D_HIDDEN_REV 0 +#undef unreachable +#include "v3d_hw_auto.h" + +/** Global (across GEM fds) state for the simulator */ +static struct vc5_simulator_state { + mtx_t mutex; + + struct v3d_hw *v3d; + + /* Base virtual address of the heap. */ + void *mem; + /* Base hardware address of the heap. */ + uint32_t mem_base; + /* Size of the heap. */ + size_t mem_size; + + struct mem_block *heap; + struct mem_block *overflow; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *fd_map; + + int refcount; +} sim_state = { + .mutex = _MTX_INITIALIZER_NP, +}; + +/** Per-GEM-fd state for the simulator. */ +struct vc5_simulator_file { + int fd; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *bo_map; + + struct mem_block *gmp; + void *gmp_vaddr; +}; + +/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */ +struct vc5_simulator_bo { + struct vc5_simulator_file *file; + + /** Area for this BO within sim_state->mem */ + struct mem_block *block; + uint32_t size; + void *vaddr; + + void *winsys_map; + uint32_t winsys_stride; + + int handle; +}; + +static void * +int_to_key(int key) +{ + return (void *)(uintptr_t)key; +} + +static struct vc5_simulator_file * +vc5_get_simulator_file_for_fd(int fd) +{ + struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map, + int_to_key(fd + 1)); + return entry ? entry->data : NULL; +} + +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + +/* 128kb */ +#define GMP_ALIGN2 17 + +/** + * Sets the range of GPU virtual address space to have the given GMP + * permissions (bit 0 = read, bit 1 = write, write-only forbidden). + */ +static void +set_gmp_flags(struct vc5_simulator_file *file, + uint32_t offset, uint32_t size, uint32_t flag) +{ + assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0); + int gmp_offset = offset >> GMP_ALIGN2; + int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2; + uint32_t *gmp = file->gmp_vaddr; + + assert(flag <= 0x3); + + for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) { + int32_t bitshift = (i % 16) * 2; + gmp[i / 16] &= ~(0x3 << bitshift); + gmp[i / 16] |= flag << bitshift; + } +} + +/** + * Allocates space in simulator memory and returns a tracking struct for it + * that also contains the drm_gem_cma_object struct. + */ +static struct vc5_simulator_bo * +vc5_create_simulator_bo(int fd, int handle, unsigned size) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = rzalloc(file, + struct vc5_simulator_bo); + size = align(size, 4096); + + sim_bo->file = file; + sim_bo->handle = handle; + + mtx_lock(&sim_state.mutex); + sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0); + mtx_unlock(&sim_state.mutex); + assert(sim_bo->block); + + set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); + + sim_bo->size = size; + sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; + memset(sim_bo->vaddr, 0xd0, size); + + *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; + + /* A handle of 0 is used for vc5_gem.c internal allocations that + * don't need to go in the lookup table. + */ + if (handle != 0) { + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(file->bo_map, int_to_key(handle), + sim_bo); + mtx_unlock(&sim_state.mutex); + } + + return sim_bo; +} + +static void +vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo) +{ + struct vc5_simulator_file *sim_file = sim_bo->file; + + if (sim_bo->winsys_map) + munmap(sim_bo->winsys_map, sim_bo->size); + + set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); + + mtx_lock(&sim_state.mutex); + u_mmFreeMem(sim_bo->block); + if (sim_bo->handle) { + struct hash_entry *entry = + _mesa_hash_table_search(sim_file->bo_map, + int_to_key(sim_bo->handle)); + _mesa_hash_table_remove(sim_file->bo_map, entry); + } + mtx_unlock(&sim_state.mutex); + ralloc_free(sim_bo); +} + +static struct vc5_simulator_bo * +vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle) +{ + mtx_lock(&sim_state.mutex); + struct hash_entry *entry = + _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle)); + mtx_unlock(&sim_state.mutex); + + return entry ? entry->data : NULL; +} + +static int +vc5_simulator_pin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + vc5_bo_map(bo); + memcpy(sim_bo->vaddr, bo->map, bo->size); + } + + return 0; +} + +static int +vc5_simulator_unpin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + assert(*(uint32_t *)(sim_bo->vaddr + + sim_bo->size) == BO_SENTINEL); + + vc5_bo_map(bo); + memcpy(bo->map, sim_bo->vaddr, bo->size); + } + + return 0; +} + +#if 0 +static void +vc5_dump_to_file(struct vc5_exec_info *exec) +{ + static int dumpno = 0; + struct drm_vc5_get_hang_state *state; + struct drm_vc5_get_hang_state_bo *bo_state; + unsigned int dump_version = 0; + + if (!(vc5_debug & VC5_DEBUG_DUMP)) + return; + + state = calloc(1, sizeof(*state)); + + int unref_count = 0; + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + unref_count++; + } + + /* Add one more for the overflow area that isn't wrapped in a BO. */ + state->bo_count = exec->bo_count + unref_count + 1; + bo_state = calloc(state->bo_count, sizeof(*bo_state)); + + char *filename = NULL; + asprintf(&filename, "vc5-dri-%d.dump", dumpno++); + FILE *f = fopen(filename, "w+"); + if (!f) { + fprintf(stderr, "Couldn't open %s: %s", filename, + strerror(errno)); + return; + } + + fwrite(&dump_version, sizeof(dump_version), 1, f); + + state->ct0ca = exec->ct0ca; + state->ct0ea = exec->ct0ea; + state->ct1ca = exec->ct1ca; + state->ct1ea = exec->ct1ea; + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + fwrite(state, sizeof(*state), 1, f); + + int i; + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + bo_state[i].handle = i; /* Not used by the parser. */ + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + bo_state[i].handle = 0; + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + i++; + } + + /* Add the static overflow memory area. */ + bo_state[i].handle = exec->bo_count; + bo_state[i].paddr = sim_state.overflow->ofs; + bo_state[i].size = sim_state.overflow->size; + i++; + + fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + + for (int i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + void *overflow = calloc(1, sim_state.overflow->size); + fwrite(overflow, 1, sim_state.overflow->size, f); + free(overflow); + + free(state); + free(bo_state); + fclose(f); +} +#endif + +#define V3D_WRITE(reg, val) v3d_hw_write_reg(sim_state.v3d, reg, val) +#define V3D_READ(reg) v3d_hw_read_reg(sim_state.v3d, reg) + +static void +vc5_flush_l3(void) +{ + if (!v3d_hw_has_gca(sim_state.v3d)) + return; + + uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); + + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); +} + +/* Invalidates the L2 cache. This is a read-only cache. */ +static void +vc5_flush_l2(void) +{ + V3D_WRITE(V3D_CTL_0_L2CACTL, + V3D_CTL_0_L2CACTL_L2CCLR_SET | + V3D_CTL_0_L2CACTL_L2CENA_SET); +} + +/* Invalidates texture L2 cachelines */ +static void +vc5_flush_l2t(void) +{ + V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); + V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); + V3D_WRITE(V3D_CTL_0_L2TCACTL, + V3D_CTL_0_L2TCACTL_L2TFLS_SET | + (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +vc5_flush_slices(void) +{ + V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); +} + +static void +vc5_flush_caches(void) +{ + vc5_flush_l3(); + vc5_flush_l2(); + vc5_flush_l2t(); + vc5_flush_slices(); +} + +int +vc5_simulator_flush(struct vc5_context *vc5, + struct drm_vc5_submit_cl *submit, struct vc5_job *job) +{ + struct vc5_screen *screen = vc5->screen; + int fd = screen->fd; + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]); + struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL; + struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL; + uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; + uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; + uint32_t row_len = MIN2(sim_stride, winsys_stride); + int ret; + + if (ctex && csim_bo->winsys_map) { +#if 0 + fprintf(stderr, "%dx%d %d %d %d\n", + ctex->base.b.width0, ctex->base.b.height0, + winsys_stride, + sim_stride, + ctex->bo->size); +#endif + + for (int y = 0; y < ctex->base.b.height0; y++) { + memcpy(ctex->bo->map + y * sim_stride, + csim_bo->winsys_map + y * winsys_stride, + row_len); + } + } + + ret = vc5_simulator_pin_bos(fd, job); + if (ret) + return ret; + + //vc5_dump_to_file(&exec); + + /* Completely reset the GMP. */ + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CFG, + V3D_GMP_0_CFG_PROTENABLE_SET); + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_TABLE_ADDR, file->gmp->ofs); + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CLEAR_LOAD, ~0); + while (v3d_hw_read_reg(sim_state.v3d, V3D_GMP_0_STATUS) & + V3D_GMP_0_STATUS_CFG_BUSY_SET) { + ; + } + + vc5_flush_caches(); + + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QBA, submit->bcl_start); + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QEA, submit->bcl_end); + + /* Wait for bin to complete before firing render, as it seems the + * simulator doesn't implement the semaphores. + */ + while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0EA)) { + v3d_hw_tick(sim_state.v3d); + } + + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QBA, submit->rcl_start); + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QEA, submit->rcl_end); + + while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1EA) || + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1EA)) { + v3d_hw_tick(sim_state.v3d); + } + + ret = vc5_simulator_unpin_bos(fd, job); + if (ret) + return ret; + + if (ctex && csim_bo->winsys_map) { + for (int y = 0; y < ctex->base.b.height0; y++) { + memcpy(csim_bo->winsys_map + y * winsys_stride, + ctex->bo->map + y * sim_stride, + row_len); + } + } + + return 0; +} + +/** + * Map the underlying GEM object from the real hardware GEM handle. + */ +static void * +vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo) +{ + int ret; + void *map; + + struct drm_mode_map_dumb map_dumb = { + .handle = sim_bo->handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, map_dumb.offset); + if (map == MAP_FAILED) { + fprintf(stderr, + "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + sim_bo->handle, (long long)map_dumb.offset, + (int)sim_bo->size); + abort(); + } + + return map; +} + +/** + * Do fixups after a BO has been opened from a handle. + * + * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE + * time, but we're still using drmPrimeFDToHandle() so we have this helper to + * be called afterward instead. + */ +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size) +{ + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, handle, size); + + sim_bo->winsys_stride = winsys_stride; + sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo); +} + +/** + * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. + * + * Making a VC5 BO is just a matter of making a corresponding BO on the host. + */ +static int +vc5_simulator_create_bo_ioctl(int fd, struct drm_vc5_create_bo *args) +{ + int ret; + struct drm_mode_create_dumb create = { + .width = 128, + .bpp = 8, + .height = (args->size + 127) / 128, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + assert(create.size >= args->size); + + args->handle = create.handle; + + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, create.handle, args->size); + + args->offset = sim_bo->block->ofs; + + return ret; +} + +/** + * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. + * + * We just pass this straight through to dumb mmap. + */ +static int +vc5_simulator_mmap_bo_ioctl(int fd, struct drm_vc5_mmap_bo *args) +{ + int ret; + struct drm_mode_map_dumb map = { + .handle = args->handle, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + args->offset = map.offset; + + return ret; +} + +static int +vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) +{ + /* Free the simulator's internal tracking. */ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + vc5_free_simulator_bo(sim_bo); + + /* Pass the call on down. */ + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); +} + +static int +vc5_simulator_get_param_ioctl(int fd, struct drm_vc5_get_param *args) +{ + static const uint32_t reg_map[] = { + [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, + [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, + [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, + [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, + [DRM_VC5_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, + [DRM_VC5_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, + [DRM_VC5_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, + }; + + if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { + args->value = v3d_hw_read_reg(sim_state.v3d, + reg_map[args->param]); + return 0; + } + + fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", + (long long)args->value); + abort(); +} + +int +vc5_simulator_ioctl(int fd, unsigned long request, void *args) +{ + switch (request) { + case DRM_IOCTL_VC5_CREATE_BO: + return vc5_simulator_create_bo_ioctl(fd, args); + case DRM_IOCTL_VC5_MMAP_BO: + return vc5_simulator_mmap_bo_ioctl(fd, args); + + case DRM_IOCTL_VC5_WAIT_BO: + case DRM_IOCTL_VC5_WAIT_SEQNO: + /* We do all of the vc5 rendering synchronously, so we just + * return immediately on the wait ioctls. This ignores any + * native rendering to the host BO, so it does mean we race on + * front buffer rendering. + */ + return 0; + + case DRM_IOCTL_VC5_GET_PARAM: + return vc5_simulator_get_param_ioctl(fd, args); + + case DRM_IOCTL_GEM_CLOSE: + return vc5_simulator_gem_close_ioctl(fd, args); + + case DRM_IOCTL_GEM_OPEN: + case DRM_IOCTL_GEM_FLINK: + return drmIoctl(fd, request, args); + default: + fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request); + abort(); + } +} + +static void +vc5_simulator_init_global(void) +{ + mtx_lock(&sim_state.mutex); + if (sim_state.refcount++) { + mtx_unlock(&sim_state.mutex); + return; + } + + sim_state.v3d = v3d_hw_auto_new(NULL); + v3d_hw_alloc_mem(sim_state.v3d, 256 * 1024 * 1024); + sim_state.mem_base = + v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size, + &sim_state.mem); + + sim_state.heap = u_mmInit(0, sim_state.mem_size); + + /* Make a block of 0xd0 at address 0 to make sure we don't screw up + * and land there. + */ + struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0); + memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096); + + mtx_unlock(&sim_state.mutex); + + sim_state.fd_map = + _mesa_hash_table_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +void +vc5_simulator_init(struct vc5_screen *screen) +{ + vc5_simulator_init_global(); + + screen->sim_file = rzalloc(screen, struct vc5_simulator_file); + struct vc5_simulator_file *sim_file = screen->sim_file; + + screen->sim_file->bo_map = + _mesa_hash_table_create(screen->sim_file, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1), + screen->sim_file); + mtx_unlock(&sim_state.mutex); + + sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0); + sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs - + sim_state.mem_base); +} + +void +vc5_simulator_destroy(struct vc5_screen *screen) +{ + mtx_lock(&sim_state.mutex); + if (!--sim_state.refcount) { + _mesa_hash_table_destroy(sim_state.fd_map, NULL); + u_mmDestroy(sim_state.heap); + /* No memsetting the struct, because it contains the mutex. */ + sim_state.mem = NULL; + } + mtx_unlock(&sim_state.mutex); +} + +#endif /* USE_VC5_SIMULATOR */ diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c new file mode 100644 index 00000000000..b289d20cb62 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_state.c @@ -0,0 +1,663 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_half.h" +#include "util/u_helpers.h" + +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void * +vc5_generic_cso_state_create(const void *src, uint32_t size) +{ + void *dst = calloc(1, size); + if (!dst) + return NULL; + memcpy(dst, src, size); + return dst; +} + +static void +vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +vc5_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) { + vc5->blend_color.hf[i] = + util_float_to_half(blend_color->color[i]); + } + vc5->dirty |= VC5_DIRTY_BLEND_COLOR; +} + +static void +vc5_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stencil_ref = *stencil_ref; + vc5->dirty |= VC5_DIRTY_STENCIL_REF; +} + +static void +vc5_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->clip = *clip; + vc5->dirty |= VC5_DIRTY_CLIP; +} + +static void +vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); + vc5->dirty |= VC5_DIRTY_SAMPLE_MASK; +} + +static uint16_t +float_to_187_half(float f) +{ + return fui(f) >> 16; +} + +static void * +vc5_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct vc5_rasterizer_state *so; + + so = CALLOC_STRUCT(vc5_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, + * BCM21553). + */ + so->point_size = MAX2(cso->point_size, .125f); + + if (cso->offset_tri) { + so->offset_units = float_to_187_half(cso->offset_units); + so->offset_factor = float_to_187_half(cso->offset_scale); + } + + return so; +} + +/* Blend state is baked into shaders. */ +static void * +vc5_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + return vc5_generic_cso_state_create(cso, sizeof(*cso)); +} + +static void * +vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct vc5_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->depth.enabled) { + /* We only handle early Z in the < direction because otherwise + * we'd have to runtime guess which direction to set in the + * render config. + */ + so->early_z_enable = + ((cso->depth.func == PIPE_FUNC_LESS || + cso->depth.func == PIPE_FUNC_LEQUAL) && + (!cso->stencil[0].enabled || + (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP && + (!cso->stencil[1].enabled || + cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP)))); + } + + return so; +} + +static void +vc5_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stipple = *stipple; + vc5->dirty |= VC5_DIRTY_STIPPLE; +} + +static void +vc5_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->scissor = *scissor; + vc5->dirty |= VC5_DIRTY_SCISSOR; +} + +static void +vc5_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->viewport = *viewport; + vc5->dirty |= VC5_DIRTY_VIEWPORT; +} + +static void +vc5_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, + start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + vc5->dirty |= VC5_DIRTY_VTXBUF; +} + +static void +vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend = hwcso; + vc5->dirty |= VC5_DIRTY_BLEND; +} + +static void +vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_rasterizer_state *rast = hwcso; + + if (vc5->rasterizer && rast && + vc5->rasterizer->base.flatshade != rast->base.flatshade) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + vc5->rasterizer = hwcso; + vc5->dirty |= VC5_DIRTY_RASTERIZER; +} + +static void +vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->zsa = hwcso; + vc5->dirty |= VC5_DIRTY_ZSA; +} + +static void * +vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + return so; +} + +static void +vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->vtx = hwcso; + vc5->dirty |= VC5_DIRTY_VTXSTATE; +} + +static void +vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + const struct pipe_constant_buffer *cb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; + + util_copy_constant_buffer(&so->cb[index], cb); + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + return; + } + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + vc5->dirty |= VC5_DIRTY_CONSTBUF; +} + +static void +vc5_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct pipe_framebuffer_state *cso = &vc5->framebuffer; + unsigned i; + + vc5->job = NULL; + + for (i = 0; i < framebuffer->nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); + for (; i < vc5->framebuffer.nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], NULL); + + cso->nr_cbufs = framebuffer->nr_cbufs; + + pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); + + cso->width = framebuffer->width; + cso->height = framebuffer->height; + + vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; +} + +static struct vc5_texture_stateobj * +vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) +{ + switch (shader) { + case PIPE_SHADER_FRAGMENT: + vc5->dirty |= VC5_DIRTY_FRAGTEX; + return &vc5->fragtex; + break; + case PIPE_SHADER_VERTEX: + vc5->dirty |= VC5_DIRTY_VERTTEX; + return &vc5->verttex; + break; + default: + fprintf(stderr, "Unknown shader target %d\n", shader); + abort(); + } +} + +static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) +{ + switch (pipe_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return 0; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return 1; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return 2; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return 3; + case PIPE_TEX_WRAP_CLAMP: + return (using_nearest ? 1 : 3); + default: + unreachable("Unknown wrap mode"); + } +} + + +static void * +vc5_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); + + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + bool either_nearest = + (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || + cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = { + .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest), + .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest), + .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest), + }; + V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, + (uint8_t *)&so->p0, + &p0_unpacked); + + struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { + cl_packet_header(TEXTURE_SHADER_STATE), + + .min_level_of_detail = MAX2(cso->min_lod, 0.0), + .depth_compare_function = cso->compare_func, + .fixed_bias = cso->lod_bias, + }; + STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == + cl_packet_length(TEXTURE_SHADER_STATE)); + cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, + &state_unpacked); + + return so; +} + +static void +vc5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + + assert(start == 0); + unsigned i; + unsigned new_nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + stage_tex->samplers[i] = hwcso[i]; + } + + for (; i < stage_tex->num_samplers; i++) { + stage_tex->samplers[i] = NULL; + } + + stage_tex->num_samplers = new_nr; +} + +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +static struct pipe_sampler_view * +vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); + struct vc5_resource *rsc = vc5_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + + pipe_reference(NULL, &prsc->reference); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + }; + + unpacked.return_word_0_of_texture_data = true; + if (vc5_get_tex_return_size(cso->format) == 16) { + unpacked.return_word_1_of_texture_data = true; + } else { + int chans = vc5_get_tex_return_channels(cso->format); + + if (chans > 1) + unpacked.return_word_1_of_texture_data = true; + if (chans > 2) + unpacked.return_word_2_of_texture_data = true; + if (chans > 3) + unpacked.return_word_3_of_texture_data = true; + } + + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, + (uint8_t *)&so->p1, + &unpacked); + + /* Compute the sampler view's swizzle up front. This will be plugged + * into either the sampler (for 16-bit returns) or the shader's + * texture key (for 32) + */ + uint8_t view_swizzle[4] = { + cso->swizzle_r, + cso->swizzle_g, + cso->swizzle_b, + cso->swizzle_a + }; + const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format); + util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); + + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { + cl_packet_header(TEXTURE_SHADER_STATE), + + .image_width = prsc->width0, + .image_height = prsc->height0, + .image_depth = prsc->depth0, + + .texture_type = rsc->tex_format, + .srgb = util_format_is_srgb(cso->format), + + .base_level = cso->u.tex.first_level, + .array_stride_64_byte_aligned = rsc->cube_map_stride / 64, + }; + + /* Note: Contrary to the docs, the swizzle still applies even + * if the return size is 32. It's just that you probably want + * to swizzle in the shader, because you need the Y/Z/W + * channels to be defined. + */ + if (vc5_get_tex_return_size(cso->format) != 32) { + state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]); + state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]); + state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]); + state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]); + } else { + state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } + + /* XXX: While we need to use this flag to enable tiled + * resource sharing (even a small shared buffer should be UIF, + * not UBLINEAR or raster), this is also at the moment + * patching up the fact that our resource layout's decisions + * about XOR don't quite match the HW's. + */ + switch (rsc->slices[0].tiling) { + case VC5_TILING_UIF_NO_XOR: + case VC5_TILING_UIF_XOR: + state_unpacked.level_0_is_strictly_uif = true; + state_unpacked.level_0_xor_enable = false; + break; + default: + break; + } + + STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == + cl_packet_length(TEXTURE_SHADER_STATE)); + cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, + &state_unpacked); + + return &so->base; +} + +static void +vc5_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + free(view); +} + +static void +vc5_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); + } + + for (; i < stage_tex->num_textures; i++) { + pipe_sampler_view_reference(&stage_tex->textures[i], NULL); + } + + stage_tex->num_textures = new_nr; +} + +static struct pipe_stream_output_target * +vc5_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +vc5_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +vc5_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct vc5_context *ctx = vc5_context(pctx); + struct vc5_streamout_stateobj *so = &ctx->streamout; + unsigned i; + + assert(num_targets <= ARRAY_SIZE(so->targets)); + + for (i = 0; i < num_targets; i++) + pipe_so_target_reference(&so->targets[i], targets[i]); + + for (; i < so->num_targets; i++) + pipe_so_target_reference(&so->targets[i], NULL); + + so->num_targets = num_targets; + + ctx->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +vc5_state_init(struct pipe_context *pctx) +{ + pctx->set_blend_color = vc5_set_blend_color; + pctx->set_stencil_ref = vc5_set_stencil_ref; + pctx->set_clip_state = vc5_set_clip_state; + pctx->set_sample_mask = vc5_set_sample_mask; + pctx->set_constant_buffer = vc5_set_constant_buffer; + pctx->set_framebuffer_state = vc5_set_framebuffer_state; + pctx->set_polygon_stipple = vc5_set_polygon_stipple; + pctx->set_scissor_states = vc5_set_scissor_states; + pctx->set_viewport_states = vc5_set_viewport_states; + + pctx->set_vertex_buffers = vc5_set_vertex_buffers; + + pctx->create_blend_state = vc5_create_blend_state; + pctx->bind_blend_state = vc5_blend_state_bind; + pctx->delete_blend_state = vc5_generic_cso_state_delete; + + pctx->create_rasterizer_state = vc5_create_rasterizer_state; + pctx->bind_rasterizer_state = vc5_rasterizer_state_bind; + pctx->delete_rasterizer_state = vc5_generic_cso_state_delete; + + pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state; + pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete; + + pctx->create_vertex_elements_state = vc5_vertex_state_create; + pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete; + pctx->bind_vertex_elements_state = vc5_vertex_state_bind; + + pctx->create_sampler_state = vc5_create_sampler_state; + pctx->delete_sampler_state = vc5_generic_cso_state_delete; + pctx->bind_sampler_states = vc5_sampler_states_bind; + + pctx->create_sampler_view = vc5_create_sampler_view; + pctx->sampler_view_destroy = vc5_sampler_view_destroy; + pctx->set_sampler_views = vc5_set_sampler_views; + + pctx->create_stream_output_target = vc5_create_stream_output_target; + pctx->stream_output_target_destroy = vc5_stream_output_target_destroy; + pctx->set_stream_output_targets = vc5_set_stream_output_targets; +} diff --git a/src/gallium/drivers/vc5/vc5_tiling.c b/src/gallium/drivers/vc5/vc5_tiling.c new file mode 100644 index 00000000000..279774e5553 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_tiling.c @@ -0,0 +1,402 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_tiling.c + * + * Handles information about the VC5 tiling formats, and loading and storing + * from them. + */ + +#include <stdint.h> +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_tiling.h" + +struct mb_layout { + /** Height, in pixels, of a macroblock (2x2 utiles, a UIF block). */ + uint32_t height; + /** Width, in pixels, of a macroblock (2x2 utiles, a UIF block). */ + uint32_t width; + uint32_t tile_row_stride; +}; + +enum { + MB_LAYOUT_8BPP, + MB_LAYOUT_16BPP, + MB_LAYOUT_32BPP, + MB_LAYOUT_64BPP, + MB_LAYOUT_128BPP, +}; + +static const struct mb_layout mb_layouts[] = { + [MB_LAYOUT_8BPP] = { .height = 16, .width = 16, .tile_row_stride = 8 }, + [MB_LAYOUT_16BPP] = { .height = 8, .width = 16, .tile_row_stride = 8 }, + [MB_LAYOUT_32BPP] = { .height = 8, .width = 8, .tile_row_stride = 4 }, + [MB_LAYOUT_64BPP] = { .height = 4, .width = 8, .tile_row_stride = 4 }, + [MB_LAYOUT_128BPP] = { .height = 4, .width = 4, .tile_row_stride = 2 }, +}; + +static const struct mb_layout * +get_mb_layout(int cpp) +{ + const struct mb_layout *layout = &mb_layouts[ffs(cpp) - 1]; + + /* Sanity check the table. XXX: We should de-duplicate. */ + assert(layout->width == vc5_utile_width(cpp) * 2); + assert(layout->tile_row_stride == vc5_utile_width(cpp)); + + return layout; +} + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + + assert(x < utile_w && y < utile_h); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + const struct mb_layout *layout = get_mb_layout(cpp); + uint32_t mb_width = layout->width; + uint32_t mb_height = layout->height; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < mb_height / 2; + bool left = mb_pixel_x < mb_width / 2; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t mb_tile_y = mb_pixel_y & ~(mb_height / 2); + uint32_t mb_tile_x = mb_pixel_x & ~(mb_width / 2); + uint32_t mb_tile_pixel_id = (mb_tile_y * + layout->tile_row_stride + + mb_tile_x); + + uint32_t mb_tile_addr = mb_tile_pixel_id * cpp; + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + mb_tile_addr); + + return mb_pixel_address; +} + +static inline void +vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +static inline void +vc5_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum vc5_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case VC5_TILING_UIF_NO_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_2_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_2_column_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_1_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_1_column_pixel_offset, + is_load); + break; + case VC5_TILING_LINEARTILE: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/src/gallium/drivers/vc5/vc5_tiling.h b/src/gallium/drivers/vc5/vc5_tiling.h new file mode 100644 index 00000000000..d3cf48c4527 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_tiling.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_TILING_H +#define VC5_TILING_H + +uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST; +uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST; +bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; +void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); +void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); +void vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); +void vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); + +#endif /* VC5_TILING_H */ diff --git a/src/gallium/drivers/vc5/vc5_uniforms.c b/src/gallium/drivers/vc5/vc5_uniforms.c new file mode 100644 index 00000000000..dc444fe92a4 --- /dev/null +++ b/src/gallium/drivers/vc5/vc5_uniforms.c @@ -0,0 +1,417 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_pack_color.h" +#include "util/format_srgb.h" + +#include "vc5_context.h" +#include "compiler/v3d_compiler.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#if 0 + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +static void +write_texture_border_color(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_state *sampler = texstate->samplers[unit]; + struct pipe_sampler_view *texture = texstate->textures[unit]; + struct vc5_resource *rsc = vc5_resource(texture->texture); + union util_color uc; + + const struct util_format_description *tex_format_desc = + util_format_description(texture->format); + + float border_color[4]; + for (int i = 0; i < 4; i++) + border_color[i] = sampler->border_color.f[i]; + if (util_format_is_srgb(texture->format)) { + for (int i = 0; i < 3; i++) + border_color[i] = + util_format_linear_to_srgb_float(border_color[i]); + } + + /* Turn the border color into the layout of channels that it would + * have when stored as texture contents. + */ + float storage_color[4]; + util_format_unswizzle_4f(storage_color, + border_color, + tex_format_desc->swizzle); + + /* Now, pack so that when the vc5_format-sampled texture contents are + * replaced with our border color, the vc5_get_format_swizzle() + * swizzling will get the right channels. + */ + if (util_format_is_depth_or_stencil(texture->format)) { + uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, + sampler->border_color.f[0]) << 8; + } else { + switch (rsc->vc5_format) { + default: + case VC5_TEXTURE_TYPE_RGBA8888: + util_pack_color(storage_color, + PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGBA4444: + util_pack_color(storage_color, + PIPE_FORMAT_A8B8G8R8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGB565: + util_pack_color(storage_color, + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_ALPHA: + uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; + break; + case VC5_TEXTURE_TYPE_LUMALPHA: + uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | + (float_to_ubyte(storage_color[0]) << 0)); + break; + } + } + + cl_aligned_u32(uniforms, uc.ui[0]); +} +#endif + +static uint32_t +get_texrect_scale(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + uint32_t dim; + + if (contents == QUNIFORM_TEXRECT_SCALE_X) + dim = texture->texture->width0; + else + dim = texture->texture->height0; + + return fui(1.0f / dim); +} + +static uint32_t +get_texture_size(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + + switch (contents) { + case QUNIFORM_TEXTURE_WIDTH: + return u_minify(texture->texture->width0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_HEIGHT: + return u_minify(texture->texture->height0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_DEPTH: + return u_minify(texture->texture->depth0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_ARRAY_SIZE: + return texture->texture->array_size; + case QUNIFORM_TEXTURE_LEVELS: + return (texture->u.tex.last_level - + texture->u.tex.first_level) + 1; + default: + unreachable("Bad texture size field"); + } +} + +static struct vc5_bo * +vc5_upload_ubo(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + const uint32_t *gallium_uniforms) +{ + if (!shader->prog_data.base->ubo_size) + return NULL; + + struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen, + shader->prog_data.base->ubo_size, + "ubo"); + void *data = vc5_bo_map(ubo); + for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) { + memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset, + ((const void *)gallium_uniforms + + shader->prog_data.base->ubo_ranges[i].src_offset), + shader->prog_data.base->ubo_ranges[i].size); + } + + return ubo; +} + +/** + * Writes the P0 (CFG_MODE=1) texture parameter. + * + * Some bits of this field are dependent on the type of sample being done by + * the shader, while other bits are dependent on the sampler state. We OR the + * two together here. + */ +static void +write_texture_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit, + uint32_t shader_data) +{ + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_u32(uniforms, shader_data | sampler->p0); +} + +static void +write_texture_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + .texture_state_record_base_address = texstate->texture_state[unit], + }; + + uint32_t packed; + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, + (uint8_t *)&packed, + &unpacked); + + cl_aligned_u32(uniforms, packed | sview->p1); +} + +struct vc5_cl_reloc +vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate) +{ + struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; + struct vc5_job *job = vc5->job; + const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; + struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms); + + /* We always need to return some space for uniforms, because the HW + * will be prefetching, even if we don't read any in the program. + */ + vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4); + + struct vc5_cl_reloc uniform_stream = + cl_address(job->indirect.bo, cl_offset(&job->indirect)); + vc5_bo_reference(uniform_stream.bo); + + struct vc5_cl_out *uniforms = + cl_start(&job->indirect); + + for (int i = 0; i < uinfo->count; i++) { + + switch (uinfo->contents[i]) { + case QUNIFORM_CONSTANT: + cl_aligned_u32(&uniforms, uinfo->data[i]); + break; + case QUNIFORM_UNIFORM: + cl_aligned_u32(&uniforms, + gallium_uniforms[uinfo->data[i]]); + break; + case QUNIFORM_VIEWPORT_X_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f); + break; + case QUNIFORM_VIEWPORT_Y_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f); + break; + + case QUNIFORM_VIEWPORT_Z_OFFSET: + cl_aligned_f(&uniforms, vc5->viewport.translate[2]); + break; + case QUNIFORM_VIEWPORT_Z_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[2]); + break; + + case QUNIFORM_USER_CLIP_PLANE: + cl_aligned_f(&uniforms, + vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + write_texture_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + +#if 0 + case QUNIFORM_TEXTURE_FIRST_LEVEL: + write_texture_first_level(job, &uniforms, texstate, + uinfo->data[i]); + break; +#endif + + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + cl_aligned_u32(&uniforms, + get_texrect_scale(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + cl_aligned_u32(&uniforms, + get_texture_size(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_STENCIL: + cl_aligned_u32(&uniforms, + vc5->zsa->stencil_uniforms[uinfo->data[i]] | + (uinfo->data[i] <= 1 ? + (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) : + 0)); + break; + + case QUNIFORM_ALPHA_REF: + cl_aligned_f(&uniforms, + vc5->zsa->base.alpha.ref_value); + break; + + case QUNIFORM_SAMPLE_MASK: + cl_aligned_u32(&uniforms, vc5->sample_mask); + break; + + case QUNIFORM_UBO_ADDR: + if (uinfo->data[i] == 0) { + cl_aligned_reloc(&job->indirect, &uniforms, + ubo, 0); + } else { + int ubo_index = uinfo->data[i]; + struct vc5_resource *rsc = + vc5_resource(cb->cb[ubo_index].buffer); + + cl_aligned_reloc(&job->indirect, &uniforms, + rsc->bo, + cb->cb[ubo_index].buffer_offset); + } + break; + + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXTURE_MSAA_ADDR: + case QUNIFORM_TEXTURE_BORDER_COLOR: + /* XXX */ + break; + + default: + assert(quniform_contents_is_texture_p0(uinfo->contents[i])); + + write_texture_p0(job, &uniforms, texstate, + uinfo->contents[i] - + QUNIFORM_TEXTURE_CONFIG_P0_0, + uinfo->data[i]); + break; + + } +#if 0 + uint32_t written_val = *((uint32_t *)uniforms - 1); + fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", + shader, i, __gen_address_offset(&uniform_stream) + i * 4, + written_val, uif(written_val)); +#endif + } + + cl_end(&job->indirect, uniforms); + + vc5_bo_unreference(&ubo); + + return uniform_stream; +} + +void +vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader) +{ + uint32_t dirty = 0; + + for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) { + switch (shader->prog_data.base->uniforms.contents[i]) { + case QUNIFORM_CONSTANT: + break; + case QUNIFORM_UNIFORM: + case QUNIFORM_UBO_ADDR: + dirty |= VC5_DIRTY_CONSTBUF; + break; + + case QUNIFORM_VIEWPORT_X_SCALE: + case QUNIFORM_VIEWPORT_Y_SCALE: + case QUNIFORM_VIEWPORT_Z_OFFSET: + case QUNIFORM_VIEWPORT_Z_SCALE: + dirty |= VC5_DIRTY_VIEWPORT; + break; + + case QUNIFORM_USER_CLIP_PLANE: + dirty |= VC5_DIRTY_CLIP; + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + case QUNIFORM_TEXTURE_BORDER_COLOR: + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXTURE_MSAA_ADDR: + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + /* We could flag this on just the stage we're + * compiling for, but it's not passed in. + */ + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + + case QUNIFORM_STENCIL: + case QUNIFORM_ALPHA_REF: + dirty |= VC5_DIRTY_ZSA; + break; + + case QUNIFORM_SAMPLE_MASK: + dirty |= VC5_DIRTY_SAMPLE_MASK; + break; + + default: + assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + } + } + + shader->uniform_dirty_bits = dirty; +} |