diff options
author | Eric Anholt <[email protected]> | 2018-05-01 12:24:48 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-05-16 21:19:07 +0100 |
commit | 8c47ebbd232704ab048eab2572e2b2a44f38957a (patch) | |
tree | 8946780fc424b3aa39e0b32ac875047605770a49 /src/gallium/drivers/v3d | |
parent | c4c488a2aeb24c0f468664c0cacd0d01111a4e46 (diff) |
v3d: Rename the driver files from "vc5" to "v3d".
Diffstat (limited to 'src/gallium/drivers/v3d')
36 files changed, 11283 insertions, 0 deletions
diff --git a/src/gallium/drivers/v3d/.editorconfig b/src/gallium/drivers/v3d/.editorconfig new file mode 100644 index 00000000000..5a9f3c041a4 --- /dev/null +++ b/src/gallium/drivers/v3d/.editorconfig @@ -0,0 +1,3 @@ +[*.{c,h,cpp}] +indent_style = space +indent_size = 8 diff --git a/src/gallium/drivers/v3d/Automake.inc b/src/gallium/drivers/v3d/Automake.inc new file mode 100644 index 00000000000..7cf8ae7cd8b --- /dev/null +++ b/src/gallium/drivers/v3d/Automake.inc @@ -0,0 +1,14 @@ +if HAVE_GALLIUM_V3D + +TARGET_DRIVERS += v3d +TARGET_CPPFLAGS += -DGALLIUM_V3D +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/winsys/v3d/drm/libv3ddrm.la \ + $(top_builddir)/src/gallium/drivers/v3d/libv3d.la \ + $(top_builddir)/src/broadcom/libbroadcom.la + +if !HAVE_GALLIUM_VC4 +TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la +endif + +endif diff --git a/src/gallium/drivers/v3d/Makefile.am b/src/gallium/drivers/v3d/Makefile.am new file mode 100644 index 00000000000..2b4c364c24e --- /dev/null +++ b/src/gallium/drivers/v3d/Makefile.am @@ -0,0 +1,56 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/broadcom \ + $(LIBDRM_CFLAGS) \ + $(V3D_SIMULATOR_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $() + +noinst_LTLIBRARIES = \ + libv3d.la \ + libv3d_v33.la \ + libv3d_v41.la \ + $() + +libv3d_v33_la_SOURCES = $(V3D_PER_VERSION_SOURCES) +libv3d_v33_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=33 + +libv3d_v41_la_SOURCES = $(V3D_PER_VERSION_SOURCES) +libv3d_v41_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=41 + +libv3d_la_SOURCES = $(C_SOURCES) + +libv3d_la_LDFLAGS = \ + $(V3D_SIMULATOR_LIBS) \ + $(NULL) +libv3d_la_LIBADD = \ + libv3d_v33.la \ + libv3d_v41.la \ + $() + +EXTRA_DIST = meson.build diff --git a/src/gallium/drivers/v3d/Makefile.sources b/src/gallium/drivers/v3d/Makefile.sources new file mode 100644 index 00000000000..c81ccb42013 --- /dev/null +++ b/src/gallium/drivers/v3d/Makefile.sources @@ -0,0 +1,36 @@ +C_SOURCES := \ + v3d_blit.c \ + v3d_bufmgr.c \ + v3d_bufmgr.h \ + v3d_cl.c \ + v3d_cl.h \ + v3d_context.c \ + v3d_context.h \ + v3d_fence.c \ + v3d_formats.c \ + v3d_format_table.h \ + v3d_job.c \ + v3d_program.c \ + v3d_query.c \ + v3d_resource.c \ + v3d_resource.h \ + v3d_screen.c \ + v3d_screen.h \ + v3d_simulator.c \ + v3d_simulator_wrapper.cpp \ + v3d_simulator_wrapper.h \ + v3d_tiling.c \ + v3d_tiling.h \ + v3d_uniforms.c \ + $() + +V3D_PER_VERSION_SOURCES = \ + v3dx_context.h \ + v3dx_draw.c \ + v3dx_emit.c \ + v3dx_format_table.c \ + v3dx_job.c \ + v3dx_rcl.c \ + v3dx_simulator.c \ + v3dx_state.c \ + $() diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build new file mode 100644 index 00000000000..38021515eda --- /dev/null +++ b/src/gallium/drivers/v3d/meson.build @@ -0,0 +1,96 @@ +# Copyright © 2017 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +files_libv3d = files( + 'v3d_blit.c', + 'v3d_bufmgr.c', + 'v3d_bufmgr.h', + 'v3d_cl.c', + 'v3d_cl.h', + 'v3d_context.c', + 'v3d_context.h', + 'v3d_fence.c', + 'v3d_formats.c', + 'v3d_job.c', + 'v3d_program.c', + 'v3d_query.c', + 'v3d_resource.c', + 'v3d_resource.h', + 'v3d_screen.c', + 'v3d_screen.h', + 'v3d_simulator.c', + 'v3d_simulator_wrapper.cpp', + 'v3d_tiling.c', + 'v3d_tiling.h', + 'v3d_uniforms.c', +) + +files_per_version = files( + 'v3dx_draw.c', + 'v3dx_emit.c', + 'v3dx_format_table.c', + 'v3dx_job.c', + 'v3dx_rcl.c', + 'v3dx_simulator.c', + 'v3dx_state.c', +) + +v3dv3_c_args = [] +dep_v3dv3 = dependency('v3dv3') +if dep_v3dv3.found() + v3dv3_c_args = '-DUSE_V3D_SIMULATOR' +endif + +v3d_versions = ['33', '41'] + +per_version_libs = [] +foreach ver : v3d_versions + per_version_libs += static_library( + 'v3d-v' + ver, + [files_per_version, v3d_xml_pack, nir_opcodes_h, nir_builder_opcodes_h], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_gallium_drivers, inc_drm_uapi, + ], + c_args : [c_vis_args, v3dv3_c_args, '-DV3D_VERSION=' + ver], + cpp_args : [cpp_vis_args], + dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind], +) + +endforeach + +libv3d = static_library( + 'v3d', + [files_libv3d, v3d_xml_pack], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_gallium_drivers, inc_drm_uapi, + ], + c_args : [c_vis_args, v3dv3_c_args], + cpp_args : [cpp_vis_args, v3dv3_c_args], + dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers], + link_with: per_version_libs, +) + +driver_v3d = declare_dependency( + compile_args : '-DGALLIUM_V3D', + link_with : [libv3d, libv3dwinsys, libbroadcom_cle, libbroadcom_v3d], + dependencies : idep_nir, +) diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c new file mode 100644 index 00000000000..7c67d4561ba --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_blit.c @@ -0,0 +1,302 @@ +/* + * Copyright © 2015-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "v3d_context.h" + +#if 0 +static struct pipe_surface * +vc5_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return pctx->create_surface(pctx, prsc, &tmpl); +} + +static bool +is_tile_unaligned(unsigned size, unsigned tile_size) +{ + return size & (tile_size - 1); +} + +static bool +vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + bool msaa = (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1); + int tile_width = msaa ? 32 : 64; + int tile_height = msaa ? 32 : 64; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + return false; + + if (info->scissor_enable) + return false; + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != info->src.box.x || + info->dst.box.y != info->src.box.y || + info->dst.box.width != info->src.box.width || + info->dst.box.height != info->src.box.height) { + return false; + } + + int dst_surface_width = u_minify(info->dst.resource->width0, + info->dst.level); + int dst_surface_height = u_minify(info->dst.resource->height0, + info->dst.level); + if (is_tile_unaligned(info->dst.box.x, tile_width) || + is_tile_unaligned(info->dst.box.y, tile_height) || + (is_tile_unaligned(info->dst.box.width, tile_width) && + info->dst.box.x + info->dst.box.width != dst_surface_width) || + (is_tile_unaligned(info->dst.box.height, tile_height) && + info->dst.box.y + info->dst.box.height != dst_surface_height)) { + return false; + } + + /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the + * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our + * destination surface) to determine the stride. This may be wrong + * when reading from texture miplevels > 0, which are stored in + * POT-sized areas. For MSAA, the tile addresses are computed + * explicitly by the RCL, but still use the destination width to + * determine the stride (which could be fixed by explicitly supplying + * it in the ABI). + */ + struct vc5_resource *rsc = vc5_resource(info->src.resource); + + uint32_t stride; + + if (info->src.resource->nr_samples > 1) + stride = align(dst_surface_width, 32) * 4 * rsc->cpp; + /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T) + stride = align(dst_surface_width * rsc->cpp, 128); */ + else + stride = align(dst_surface_width * rsc->cpp, 16); + + if (stride != rsc->slices[info->src.level].stride) + return false; + + if (info->dst.resource->format != info->src.resource->format) + return false; + + if (false) { + fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", + info->src.box.x, + info->src.box.y, + info->dst.box.x, + info->dst.box.y, + info->dst.box.width, + info->dst.box.height); + } + + struct pipe_surface *dst_surf = + vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct pipe_surface *src_surf = + vc5_get_blit_surface(pctx, info->src.resource, info->src.level); + + vc5_flush_jobs_reading_resource(vc5, info->src.resource); + + struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL); + pipe_surface_reference(&job->color_read, src_surf); + + /* If we're resolving from MSAA to single sample, we still need to run + * the engine in MSAA mode for the load. + */ + if (!job->msaa && info->src.resource->nr_samples > 1) { + job->msaa = true; + job->tile_width = 32; + job->tile_height = 32; + } + + job->draw_min_x = info->dst.box.x; + job->draw_min_y = info->dst.box.y; + job->draw_max_x = info->dst.box.x + info->dst.box.width; + job->draw_max_y = info->dst.box.y + info->dst.box.height; + job->draw_width = dst_surf->width; + job->draw_height = dst_surf->height; + + job->tile_width = tile_width; + job->tile_height = tile_height; + job->msaa = msaa; + job->needs_flush = true; + job->resolve |= PIPE_CLEAR_COLOR; + + vc5_job_submit(vc5, job); + + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); + + return true; +} +#endif + +void +vc5_blitter_save(struct vc5_context *vc5) +{ + util_blitter_save_fragment_constant_buffer_slot(vc5->blitter, + vc5->constbuf[PIPE_SHADER_FRAGMENT].cb); + util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb); + util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx); + util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); + util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer); + util_blitter_save_viewport(vc5->blitter, &vc5->viewport); + util_blitter_save_scissor(vc5->blitter, &vc5->scissor); + util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs); + util_blitter_save_blend(vc5->blitter, vc5->blend); + util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa); + util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref); + util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask); + util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer); + util_blitter_save_fragment_sampler_states(vc5->blitter, + vc5->fragtex.num_samplers, + (void **)vc5->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc5->blitter, + vc5->fragtex.num_textures, vc5->fragtex.textures); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); +} + +static bool +vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + + if (!util_blitter_is_blit_supported(vc5->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + vc5_blitter_save(vc5); + util_blitter_blit(vc5->blitter, info); + + return true; +} + +/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 + * or R8 texture. + */ +static void +vc5_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + struct vc5_resource *src = vc5_resource(info->src.resource); + struct vc5_resource *dst = vc5_resource(info->dst.resource); + enum pipe_format src_format, dst_format; + + if (src->separate_stencil) { + src = src->separate_stencil; + src_format = PIPE_FORMAT_R8_UNORM; + } else { + src_format = PIPE_FORMAT_RGBA8888_UNORM; + } + + if (dst->separate_stencil) { + dst = dst->separate_stencil; + dst_format = PIPE_FORMAT_R8_UNORM; + } else { + dst_format = PIPE_FORMAT_RGBA8888_UNORM; + } + + /* Initialize the surface. */ + struct pipe_surface dst_tmpl = { + .u.tex = { + .level = info->dst.level, + .first_layer = info->dst.box.z, + .last_layer = info->dst.box.z, + }, + .format = dst_format, + }; + struct pipe_surface *dst_surf = + ctx->create_surface(ctx, &dst->base, &dst_tmpl); + + /* Initialize the sampler view. */ + struct pipe_sampler_view src_tmpl = { + .target = src->base.target, + .format = src_format, + .u.tex = { + .first_level = info->src.level, + .last_level = info->src.level, + .first_layer = 0, + .last_layer = (PIPE_TEXTURE_3D ? + u_minify(src->base.depth0, + info->src.level) - 1 : + src->base.array_size - 1), + }, + .swizzle_r = PIPE_SWIZZLE_X, + .swizzle_g = PIPE_SWIZZLE_Y, + .swizzle_b = PIPE_SWIZZLE_Z, + .swizzle_a = PIPE_SWIZZLE_W, + }; + struct pipe_sampler_view *src_view = + ctx->create_sampler_view(ctx, &src->base, &src_tmpl); + + vc5_blitter_save(vc5); + util_blitter_blit_generic(vc5->blitter, dst_surf, &info->dst.box, + src_view, &info->src.box, + src->base.width0, src->base.height0, + PIPE_MASK_R, + PIPE_TEX_FILTER_NEAREST, + info->scissor_enable ? &info->scissor : NULL, + info->alpha_blend); + + pipe_surface_reference(&dst_surf, NULL); + pipe_sampler_view_reference(&src_view, NULL); +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +void +vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + + if (info.mask & PIPE_MASK_S) { + vc5_stencil_blit(pctx, blit_info); + info.mask &= ~PIPE_MASK_S; + } + +#if 0 + if (vc5_tile_blit(pctx, blit_info)) + return; +#endif + + vc5_render_blit(pctx, &info); +} diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.c b/src/gallium/drivers/v3d/v3d_bufmgr.c new file mode 100644 index 00000000000..ef2a5fa07be --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_bufmgr.c @@ -0,0 +1,552 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <err.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/ralloc.h" + +#include "v3d_context.h" +#include "v3d_screen.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +static bool dump_stats = false; + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache); + +static void +vc5_bo_dump_stats(struct vc5_screen *screen) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + + fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); + fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024); + fprintf(stderr, " BOs cached: %d\n", cache->bo_count); + fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024); + + if (!list_empty(&cache->time_list)) { + struct vc5_bo *first = LIST_ENTRY(struct vc5_bo, + cache->time_list.next, + time_list); + struct vc5_bo *last = LIST_ENTRY(struct vc5_bo, + cache->time_list.prev, + time_list); + + fprintf(stderr, " oldest cache time: %ld\n", + (long)first->free_time); + fprintf(stderr, " newest cache time: %ld\n", + (long)last->free_time); + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + fprintf(stderr, " now: %ld\n", + time.tv_sec); + } +} + +static void +vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo) +{ + list_del(&bo->time_list); + list_del(&bo->size_list); + cache->bo_count--; + cache->bo_size -= bo->size; +} + +static struct vc5_bo * +vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = size / 4096 - 1; + + if (cache->size_list_size <= page_index) + return NULL; + + struct vc5_bo *bo = NULL; + mtx_lock(&cache->lock); + if (!list_empty(&cache->size_list[page_index])) { + bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next, + size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!vc5_bo_wait(bo, 0, NULL)) { + mtx_unlock(&cache->lock); + return NULL; + } + + pipe_reference_init(&bo->reference, 1); + vc5_bo_remove_from_cache(cache, bo); + + bo->name = name; + } + mtx_unlock(&cache->lock); + return bo; +} + +struct vc5_bo * +vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo *bo; + int ret; + + size = align(size, 4096); + + bo = vc5_bo_from_cache(screen, size, name); + if (bo) { + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb from cache:\n", + name, size / 1024); + vc5_bo_dump_stats(screen); + } + return bo; + } + + bo = CALLOC_STRUCT(vc5_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->size = size; + bo->name = name; + bo->private = true; + + retry: + ; + + bool cleared_and_retried = false; + struct drm_v3d_create_bo create = { + .size = size + }; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_CREATE_BO, &create); + bo->handle = create.handle; + bo->offset = create.offset; + + if (ret != 0) { + if (!list_empty(&screen->bo_cache.time_list) && + !cleared_and_retried) { + cleared_and_retried = true; + vc5_bo_cache_free_all(&screen->bo_cache); + goto retry; + } + + free(bo); + return NULL; + } + + screen->bo_count++; + screen->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); + vc5_bo_dump_stats(screen); + } + + return bo; +} + +void +vc5_bo_last_unreference(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + mtx_lock(&screen->bo_cache.lock); + vc5_bo_last_unreference_locked_timed(bo, time.tv_sec); + mtx_unlock(&screen->bo_cache.lock); +} + +static void +vc5_bo_free(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + if (bo->map) { + if (using_vc5_simulator && bo->name && + strcmp(bo->name, "winsys") == 0) { + free(bo->map); + } else { + munmap(bo->map, bo->size); + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + } + } + + struct drm_gem_close c; + memset(&c, 0, sizeof(c)); + c.handle = bo->handle; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); + if (ret != 0) + fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + + screen->bo_count--; + screen->bo_size -= bo->size; + + if (dump_stats) { + fprintf(stderr, "Freed %s%s%dkb:\n", + bo->name ? bo->name : "", + bo->name ? " " : "", + bo->size / 1024); + vc5_bo_dump_stats(screen); + } + + free(bo); +} + +static void +free_stale_bos(struct vc5_screen *screen, time_t time) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + bool freed_any = false; + + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + if (dump_stats && !freed_any) { + fprintf(stderr, "Freeing stale BOs:\n"); + vc5_bo_dump_stats(screen); + freed_any = true; + } + + /* If it's more than a second old, free it. */ + if (time - bo->free_time > 2) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } else { + break; + } + } + + if (dump_stats && freed_any) { + fprintf(stderr, "Freed stale BOs:\n"); + vc5_bo_dump_stats(screen); + } +} + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache) +{ + mtx_lock(&cache->lock); + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } + mtx_unlock(&cache->lock); +} + +void +vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time) +{ + struct vc5_screen *screen = bo->screen; + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = bo->size / 4096 - 1; + + if (!bo->private) { + vc5_bo_free(bo); + return; + } + + if (cache->size_list_size <= page_index) { + struct list_head *new_list = + ralloc_array(screen, struct list_head, page_index + 1); + + /* Move old list contents over (since the array has moved, and + * therefore the pointers to the list heads have to change). + */ + for (int i = 0; i < cache->size_list_size; i++) { + struct list_head *old_head = &cache->size_list[i]; + if (list_empty(old_head)) + list_inithead(&new_list[i]); + else { + new_list[i].next = old_head->next; + new_list[i].prev = old_head->prev; + new_list[i].next->prev = &new_list[i]; + new_list[i].prev->next = &new_list[i]; + } + } + for (int i = cache->size_list_size; i < page_index + 1; i++) + list_inithead(&new_list[i]); + + cache->size_list = new_list; + cache->size_list_size = page_index + 1; + } + + bo->free_time = time; + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); + cache->bo_count++; + cache->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Freed %s %dkb to cache:\n", + bo->name, bo->size / 1024); + vc5_bo_dump_stats(screen); + } + bo->name = NULL; + + free_stale_bos(screen, time); +} + +static struct vc5_bo * +vc5_bo_open_handle(struct vc5_screen *screen, + uint32_t winsys_stride, + uint32_t handle, uint32_t size) +{ + struct vc5_bo *bo; + + assert(size); + + mtx_lock(&screen->bo_handles_mutex); + + bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); + if (bo) { + pipe_reference(NULL, &bo->reference); + goto done; + } + + bo = CALLOC_STRUCT(vc5_bo); + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->handle = handle; + bo->size = size; + bo->name = "winsys"; + bo->private = false; + +#ifdef USE_V3D_SIMULATOR + vc5_simulator_open_from_handle(screen->fd, winsys_stride, + bo->handle, bo->size); + bo->map = malloc(bo->size); +#endif + + struct drm_v3d_get_bo_offset get = { + .handle = handle, + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get); + if (ret) { + fprintf(stderr, "Failed to get BO offset: %s\n", + strerror(errno)); + free(bo->map); + free(bo); + return NULL; + } + bo->offset = get.offset; + assert(bo->offset != 0); + + util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + +done: + mtx_unlock(&screen->bo_handles_mutex); + return bo; +} + +struct vc5_bo * +vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride) +{ + struct drm_gem_open o = { + .name = name + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); + if (ret) { + fprintf(stderr, "Failed to open bo %d: %s\n", + name, strerror(errno)); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size); +} + +struct vc5_bo * +vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride) +{ + uint32_t handle; + int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); + int size; + if (ret) { + fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd); + return NULL; + } + + /* Determine the size of the bo we were handed. */ + size = lseek(fd, 0, SEEK_END); + if (size == -1) { + fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, handle, size); +} + +int +vc5_bo_get_dmabuf(struct vc5_bo *bo) +{ + int fd; + int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle, + O_CLOEXEC, &fd); + if (ret != 0) { + fprintf(stderr, "Failed to export gem bo %d to dmabuf\n", + bo->handle); + return -1; + } + + mtx_lock(&bo->screen->bo_handles_mutex); + bo->private = false; + util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&bo->screen->bo_handles_mutex); + + return fd; +} + +bool +vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) +{ + struct drm_gem_flink flink = { + .handle = bo->handle, + }; + int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) { + fprintf(stderr, "Failed to flink bo %d: %s\n", + bo->handle, strerror(errno)); + free(bo); + return false; + } + + bo->private = false; + *name = flink.name; + + return true; +} + +static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) +{ + struct drm_v3d_wait_bo wait = { + .handle = handle, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_V3D_WAIT_BO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason) +{ + struct vc5_screen *screen = bo->screen; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) { + fprintf(stderr, "Blocking on %s BO for %s\n", + bo->name, reason); + } + } + + int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + return true; +} + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo) +{ + uint64_t offset; + int ret; + + if (bo->map) + return bo->map; + + struct drm_v3d_mmap_bo map; + memset(&map, 0, sizeof(map)); + map.handle = bo->handle; + ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_V3D_MMAP_BO, &map); + offset = map.offset; + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->screen->fd, offset); + if (bo->map == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + bo->handle, (long long)offset, bo->size); + abort(); + } + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); + + return bo->map; +} + +void * +vc5_bo_map(struct vc5_bo *bo) +{ + void *map = vc5_bo_map_unsynchronized(bo); + + bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map"); + if (!ok) { + fprintf(stderr, "BO wait for map failed\n"); + abort(); + } + + return map; +} + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_bo_cache *cache = &screen->bo_cache; + + vc5_bo_cache_free_all(cache); + + if (dump_stats) { + fprintf(stderr, "BO stats after screen destroy:\n"); + vc5_bo_dump_stats(screen); + } +} diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.h b/src/gallium/drivers/v3d/v3d_bufmgr.h new file mode 100644 index 00000000000..4519a206026 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_bufmgr.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_BUFMGR_H +#define VC5_BUFMGR_H + +#include <stdint.h> +#include "util/u_hash_table.h" +#include "util/u_inlines.h" +#include "util/list.h" +#include "v3d_screen.h" + +struct vc5_context; + +struct vc5_bo { + struct pipe_reference reference; + struct vc5_screen *screen; + void *map; + const char *name; + uint32_t handle; + uint32_t size; + + /* Address of the BO in our page tables. */ + uint32_t offset; + + /** Entry in the linked list of buffers freed, by age. */ + struct list_head time_list; + /** Entry in the per-page-count linked list of buffers freed (by age). */ + struct list_head size_list; + /** Approximate second when the bo was freed. */ + time_t free_time; + /** + * Whether only our process has a reference to the BO (meaning that + * it's safe to reuse it in the BO cache). + */ + bool private; +}; + +struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, + const char *name); +void vc5_bo_last_unreference(struct vc5_bo *bo); +void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time); +struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride); +struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, + uint32_t winsys_stride); +bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name); +int vc5_bo_get_dmabuf(struct vc5_bo *bo); + +static inline void +vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo) +{ + if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) + vc5_bo_last_unreference(*old_bo); + *old_bo = new_bo; +} + +static inline struct vc5_bo * +vc5_bo_reference(struct vc5_bo *bo) +{ + pipe_reference(NULL, &bo->reference); + return bo; +} + +static inline void +vc5_bo_unreference(struct vc5_bo **bo) +{ + struct vc5_screen *screen; + if (!*bo) + return; + + if ((*bo)->private) { + /* Avoid the mutex for private BOs */ + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference(*bo); + } else { + screen = (*bo)->screen; + mtx_lock(&screen->bo_handles_mutex); + + if (pipe_reference(&(*bo)->reference, NULL)) { + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)(*bo)->handle); + vc5_bo_last_unreference(*bo); + } + + mtx_unlock(&screen->bo_handles_mutex); + } + + *bo = NULL; +} + +static inline void +vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time) +{ + if (!*bo) + return; + + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference_locked_timed(*bo, time); + *bo = NULL; +} + +void * +vc5_bo_map(struct vc5_bo *bo); + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo); + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason); + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason); + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen); + +#endif /* VC5_BUFMGR_H */ + diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3d_cl.c new file mode 100644 index 00000000000..2ffb7ea9a2c --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_cl.c @@ -0,0 +1,90 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "v3d_context.h" +/* The branching packets are the same across V3D versions. */ +#define V3D_VERSION 33 +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +void +vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl) +{ + cl->base = NULL; + cl->next = cl->base; + cl->size = 0; + cl->job = job; +} + +uint32_t +vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment) +{ + uint32_t offset = align(cl_offset(cl), alignment); + + if (offset + space <= cl->size) { + cl->next = cl->base + offset; + return offset; + } + + vc5_bo_unreference(&cl->bo); + cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL"); + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; + + return 0; +} + +void +vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space) +{ + if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + return; + + struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL"); + assert(space <= new_bo->size); + + /* Chain to the new BO from the old one. */ + if (cl->bo) { + cl_emit(cl, BRANCH, branch) { + branch.address = cl_address(new_bo, 0); + } + vc5_bo_unreference(&cl->bo); + } else { + /* Root the first RCL/BCL BO in the job. */ + vc5_job_add_bo(cl->job, cl->bo); + } + + cl->bo = new_bo; + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; +} + +void +vc5_destroy_cl(struct vc5_cl *cl) +{ + vc5_bo_unreference(&cl->bo); +} diff --git a/src/gallium/drivers/v3d/v3d_cl.h b/src/gallium/drivers/v3d/v3d_cl.h new file mode 100644 index 00000000000..7025b5a672b --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_cl.h @@ -0,0 +1,279 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CL_H +#define VC5_CL_H + +#include <stdint.h> + +#include "util/u_math.h" +#include "util/macros.h" + +struct vc5_bo; +struct vc5_job; +struct vc5_cl; + +/** + * Undefined structure, used for typechecking that you're passing the pointers + * to these functions correctly. + */ +struct vc5_cl_out; + +/** A reference to a BO used in the CL packing functions */ +struct vc5_cl_reloc { + struct vc5_bo *bo; + uint32_t offset; +}; + +static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *); + +#define __gen_user_data struct vc5_cl +#define __gen_address_type struct vc5_cl_reloc +#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ + (reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +struct vc5_cl { + void *base; + struct vc5_job *job; + struct vc5_cl_out *next; + struct vc5_bo *bo; + uint32_t size; +}; + +void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl); +void vc5_destroy_cl(struct vc5_cl *cl); +void vc5_dump_cl(void *cl, uint32_t size, bool is_render); +uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo); + +struct PACKED unaligned_16 { uint16_t x; }; +struct PACKED unaligned_32 { uint32_t x; }; + +static inline uint32_t cl_offset(struct vc5_cl *cl) +{ + return (char *)cl->next - (char *)cl->base; +} + +static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl) +{ + return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) }; +} + +static inline void +cl_advance(struct vc5_cl_out **cl, uint32_t n) +{ + (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n); +} + +static inline struct vc5_cl_out * +cl_start(struct vc5_cl *cl) +{ + return cl->next; +} + +static inline void +cl_end(struct vc5_cl *cl, struct vc5_cl_out *next) +{ + cl->next = next; + assert(cl_offset(cl) <= cl->size); +} + + +static inline void +put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val) +{ + struct unaligned_32 *p = (void *)ptr; + p->x = val; +} + +static inline void +put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val) +{ + struct unaligned_16 *p = (void *)ptr; + p->x = val; +} + +static inline void +cl_u8(struct vc5_cl_out **cl, uint8_t n) +{ + *(uint8_t *)(*cl) = n; + cl_advance(cl, 1); +} + +static inline void +cl_u16(struct vc5_cl_out **cl, uint16_t n) +{ + put_unaligned_16(*cl, n); + cl_advance(cl, 2); +} + +static inline void +cl_u32(struct vc5_cl_out **cl, uint32_t n) +{ + put_unaligned_32(*cl, n); + cl_advance(cl, 4); +} + +static inline void +cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n) +{ + *(uint32_t *)(*cl) = n; + cl_advance(cl, 4); +} + +static inline void +cl_aligned_reloc(struct vc5_cl *cl, + struct vc5_cl_out **cl_out, + struct vc5_bo *bo, uint32_t offset) +{ + cl_aligned_u32(cl_out, bo->offset + offset); + vc5_job_add_bo(cl->job, bo); +} + +static inline void +cl_ptr(struct vc5_cl_out **cl, void *ptr) +{ + *(struct vc5_cl_out **)(*cl) = ptr; + cl_advance(cl, sizeof(void *)); +} + +static inline void +cl_f(struct vc5_cl_out **cl, float f) +{ + cl_u32(cl, fui(f)); +} + +static inline void +cl_aligned_f(struct vc5_cl_out **cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + +/** + * Reference to a BO with its associated offset, used in the pack process. + */ +static inline struct vc5_cl_reloc +cl_address(struct vc5_bo *bo, uint32_t offset) +{ + struct vc5_cl_reloc reloc = { + .bo = bo, + .offset = offset, + }; + return reloc; +} + +uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align); +void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size); + +#define cl_packet_header(packet) V3DX(packet ## _header) +#define cl_packet_length(packet) V3DX(packet ## _length) +#define cl_packet_pack(packet) V3DX(packet ## _pack) +#define cl_packet_struct(packet) V3DX(packet) + +static inline void * +cl_get_emit_space(struct vc5_cl_out **cl, size_t size) +{ + void *addr = *cl; + cl_advance(cl, size); + return addr; +} + +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + */ +#define cl_emit(cl, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_with_prepacked(cl, packet, prepacked, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + uint8_t packed[cl_packet_length(packet)]; \ + cl_packet_pack(packet)(cl, packed, &name); \ + for (int _i = 0; _i < cl_packet_length(packet); _i++) \ + ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_prepacked(cl, packet) do { \ + memcpy((cl)->next, packet, sizeof(*packet)); \ + cl_advance(&(cl)->next, sizeof(*packet)); \ +} while (0) + +#define v3dx_pack(packed, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \ + cl_packet_length(packet))); \ + _loop_terminate = NULL; \ + })) \ + +/** + * Helper function called by the XML-generated pack functions for filling in + * an address field in shader records. + * + * Since we have a private address space as of VC5, our BOs can have lifelong + * offsets, and all the kernel needs to know is which BOs need to be paged in + * for this exec. + */ +static inline void +cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc) +{ + if (reloc->bo) + vc5_job_add_bo(cl->job, reloc->bo); +} + +#endif /* VC5_CL_H */ diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c new file mode 100644 index 00000000000..cb37eba3841 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_context.c @@ -0,0 +1,183 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xf86drm.h> +#include <err.h> + +#include "pipe/p_defines.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_blitter.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" +#include "pipe/p_screen.h" + +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" + +void +vc5_flush(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +static void +vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (fence) { + struct pipe_screen *screen = pctx->screen; + struct vc5_fence *f = vc5_fence_create(vc5); + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } +} + +static void +vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + + rsc->initialized_buffers = 0; + + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (!entry) + return; + + struct vc5_job *job = entry->data; + if (job->key.zsbuf && job->key.zsbuf->texture == prsc) + job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); +} + +static void +vc5_context_destroy(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (vc5->blitter) + util_blitter_destroy(vc5->blitter); + + if (vc5->primconvert) + util_primconvert_destroy(vc5->primconvert); + + if (vc5->uploader) + u_upload_destroy(vc5->uploader); + + slab_destroy_child(&vc5->transfer_pool); + + pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL); + pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL); + + vc5_program_fini(pctx); + + ralloc_free(vc5); +} + +struct pipe_context * +vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_context *vc5; + + /* Prevent dumping of the shaders built during context setup. */ + uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB; + V3D_DEBUG &= ~V3D_DEBUG_SHADERDB; + + vc5 = rzalloc(NULL, struct vc5_context); + if (!vc5) + return NULL; + struct pipe_context *pctx = &vc5->base; + + vc5->screen = screen; + + int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc5->out_sync); + if (ret) { + ralloc_free(vc5); + return NULL; + } + + pctx->screen = pscreen; + pctx->priv = priv; + pctx->destroy = vc5_context_destroy; + pctx->flush = vc5_pipe_flush; + pctx->invalidate_resource = vc5_invalidate_resource; + + if (screen->devinfo.ver >= 41) { + v3d41_draw_init(pctx); + v3d41_state_init(pctx); + } else { + v3d33_draw_init(pctx); + v3d33_state_init(pctx); + } + vc5_program_init(pctx); + vc5_query_init(pctx); + vc5_resource_context_init(pctx); + + vc5_job_init(vc5); + + vc5->fd = screen->fd; + + slab_create_child(&vc5->transfer_pool, &screen->transfer_pool); + + vc5->uploader = u_upload_create_default(&vc5->base); + vc5->base.stream_uploader = vc5->uploader; + vc5->base.const_uploader = vc5->uploader; + + vc5->blitter = util_blitter_create(pctx); + if (!vc5->blitter) + goto fail; + + vc5->primconvert = util_primconvert_create(pctx, + (1 << PIPE_PRIM_QUADS) - 1); + if (!vc5->primconvert) + goto fail; + + V3D_DEBUG |= saved_shaderdb_flag; + + vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; + vc5->active_queries = true; + + return &vc5->base; + +fail: + pctx->destroy(pctx); + return NULL; +} diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h new file mode 100644 index 00000000000..7c17eccd47e --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -0,0 +1,565 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CONTEXT_H +#define VC5_CONTEXT_H + +#ifdef V3D_VERSION +#include "broadcom/common/v3d_macros.h" +#endif + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/bitset.h" +#include "util/slab.h" +#include "xf86drm.h" +#include "v3d_drm.h" +#include "v3d_screen.h" + +struct vc5_job; +struct vc5_bo; +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); + +#include "v3d_bufmgr.h" +#include "v3d_resource.h" +#include "v3d_cl.h" + +#ifdef USE_V3D_SIMULATOR +#define using_vc5_simulator true +#else +#define using_vc5_simulator false +#endif + +#define VC5_DIRTY_BLEND (1 << 0) +#define VC5_DIRTY_RASTERIZER (1 << 1) +#define VC5_DIRTY_ZSA (1 << 2) +#define VC5_DIRTY_FRAGTEX (1 << 3) +#define VC5_DIRTY_VERTTEX (1 << 4) + +#define VC5_DIRTY_BLEND_COLOR (1 << 7) +#define VC5_DIRTY_STENCIL_REF (1 << 8) +#define VC5_DIRTY_SAMPLE_MASK (1 << 9) +#define VC5_DIRTY_FRAMEBUFFER (1 << 10) +#define VC5_DIRTY_STIPPLE (1 << 11) +#define VC5_DIRTY_VIEWPORT (1 << 12) +#define VC5_DIRTY_CONSTBUF (1 << 13) +#define VC5_DIRTY_VTXSTATE (1 << 14) +#define VC5_DIRTY_VTXBUF (1 << 15) +#define VC5_DIRTY_SCISSOR (1 << 17) +#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18) +#define VC5_DIRTY_PRIM_MODE (1 << 19) +#define VC5_DIRTY_CLIP (1 << 20) +#define VC5_DIRTY_UNCOMPILED_VS (1 << 21) +#define VC5_DIRTY_UNCOMPILED_FS (1 << 22) +#define VC5_DIRTY_COMPILED_CS (1 << 23) +#define VC5_DIRTY_COMPILED_VS (1 << 24) +#define VC5_DIRTY_COMPILED_FS (1 << 25) +#define VC5_DIRTY_FS_INPUTS (1 << 26) +#define VC5_DIRTY_STREAMOUT (1 << 27) +#define VC5_DIRTY_OQ (1 << 28) +#define VC5_DIRTY_CENTROID_FLAGS (1 << 29) + +#define VC5_MAX_FS_INPUTS 64 + +struct vc5_sampler_view { + struct pipe_sampler_view base; + uint32_t p0; + uint32_t p1; + /* Precomputed swizzles to pass in to the shader key. */ + uint8_t swizzle[4]; + + uint8_t texture_shader_state[32]; + /* V3D 4.x: Texture state struct. */ + struct vc5_bo *bo; +}; + +struct vc5_sampler_state { + struct pipe_sampler_state base; + uint32_t p0; + uint32_t p1; + + /* V3D 3.x: Packed texture state. */ + uint8_t texture_shader_state[32]; + /* V3D 4.x: Sampler state struct. */ + struct vc5_bo *bo; +}; + +struct vc5_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; +}; + +struct vc5_shader_uniform_info { + enum quniform_contents *contents; + uint32_t *data; + uint32_t count; +}; + +struct vc5_uncompiled_shader { + /** A name for this program, so you can track it in shader-db output. */ + uint32_t program_id; + /** How many variants of this program were compiled, for shader-db. */ + uint32_t compiled_variant_count; + struct pipe_shader_state base; + uint32_t num_tf_outputs; + struct v3d_varying_slot *tf_outputs; + uint16_t tf_specs[16]; + uint16_t tf_specs_psiz[16]; + uint32_t num_tf_specs; + + /** + * Flag for if the NIR in this shader originally came from TGSI. If + * so, we need to do some fixups at compile time, due to missing + * information in TGSI that exists in NIR. + */ + bool was_tgsi; +}; + +struct vc5_compiled_shader { + struct vc5_bo *bo; + + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /** + * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the + * uniforms have to be rewritten (and therefore the shader state + * reemitted). + */ + uint32_t uniform_dirty_bits; +}; + +struct vc5_program_stateobj { + struct vc5_uncompiled_shader *bind_vs, *bind_fs; + struct vc5_compiled_shader *cs, *vs, *fs; + + struct vc5_bo *spill_bo; + int spill_size_per_thread; +}; + +struct vc5_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertex_stateobj { + struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES]; + unsigned num_elements; + + uint8_t attrs[12 * VC5_MAX_ATTRIBUTES]; + struct vc5_bo *default_attribute_values; +}; + +struct vc5_streamout_stateobj { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; +}; + +/* Hash table key for vc5->jobs */ +struct vc5_job_key { + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; +}; + +enum vc5_ez_state { + VC5_EZ_UNDECIDED = 0, + VC5_EZ_GT_GE, + VC5_EZ_LT_LE, + VC5_EZ_DISABLED, +}; + +/** + * A complete bin/render job. + * + * This is all of the state necessary to submit a bin/render to the kernel. + * We want to be able to have multiple in progress at a time, so that we don't + * need to flush an existing CL just to switch to rendering to a new render + * target (which would mean reading back from the old render target when + * starting to render to it again). + */ +struct vc5_job { + struct vc5_context *vc5; + struct vc5_cl bcl; + struct vc5_cl rcl; + struct vc5_cl indirect; + struct vc5_bo *tile_alloc; + struct vc5_bo *tile_state; + uint32_t shader_rec_count; + + struct drm_v3d_submit_cl submit; + + /** + * Set of all BOs referenced by the job. This will be used for making + * the list of BOs that the kernel will need to have paged in to + * execute our job. + */ + struct set *bos; + + /** Sum of the sizes of the BOs referenced by the job. */ + uint32_t referenced_size; + + struct set *write_prscs; + + /* Size of the submit.bo_handles array. */ + uint32_t bo_handles_size; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; + /** @} */ + /** @{ + * Bounding box of the scissor across all queued drawing. + * + * Note that the max values are exclusive. + */ + uint32_t draw_min_x; + uint32_t draw_min_y; + uint32_t draw_max_x; + uint32_t draw_max_y; + /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC5_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ + /** @{ Tile information, depending on MSAA and float color buffer. */ + uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */ + uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */ + + uint32_t tile_width; /** @< Width of a tile. */ + uint32_t tile_height; /** @< Height of a tile. */ + /** maximum internal_bpp of all color render targets. */ + uint32_t internal_bpp; + + /** Whether the current rendering is in a 4X MSAA tile buffer. */ + bool msaa; + /** @} */ + + /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the + * first rendering. + */ + uint32_t cleared; + /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to + * (either clears or draws). + */ + uint32_t resolve; + uint32_t clear_color[4][4]; + float clear_z; + uint8_t clear_s; + + /** + * Set if some drawing (triangles, blits, or just a glClear()) has + * been done to the FBO, meaning that we need to + * DRM_IOCTL_VC5_SUBMIT_CL. + */ + bool needs_flush; + + /** + * Set if there is a nonzero address for OCCLUSION_QUERY_COUNTER. If + * so, we need to disable it and flush before ending the CL, to keep + * the next tile from starting with it enabled. + */ + bool oq_enabled; + + /** + * Set when a packet enabling TF on all further primitives has been + * emitted. + */ + bool tf_enabled; + + /** + * Current EZ state for drawing. Updated at the start of draw after + * we've decided on the shader being rendered. + */ + enum vc5_ez_state ez_state; + /** + * The first EZ state that was used for drawing with a decided EZ + * direction (so either UNDECIDED, GT, or LT). + */ + enum vc5_ez_state first_ez_state; + + /** + * Number of draw calls (not counting full buffer clears) queued in + * the current job. + */ + uint32_t draw_calls_queued; + + struct vc5_job_key key; +}; + +struct vc5_context { + struct pipe_context base; + + int fd; + struct vc5_screen *screen; + + /** The 3D rendering job for the currently bound FBO. */ + struct vc5_job *job; + + /* Map from struct vc5_job_key to the job for that FBO. + */ + struct hash_table *jobs; + + /** + * Map from vc5_resource to a job writing to that resource. + * + * Primarily for flushing jobs rendering to textures that are now + * being read from. + */ + struct hash_table *write_jobs; + + struct slab_child_pool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC5_DIRTY_* */ + uint32_t dirty; + + struct primconvert_context *primconvert; + + struct hash_table *fs_cache, *vs_cache; + uint32_t next_uncompiled_program_id; + uint64_t next_compiled_program_id; + + struct vc5_compiler_state *compiler_state; + + uint8_t prim_mode; + + /** Maximum index buffer valid for the current shader_rec. */ + uint32_t max_index; + + /** Sync object that our RCL will update as its out_sync. */ + uint32_t out_sync; + + struct u_upload_mgr *uploader; + + /** @{ Current pipeline state objects */ + struct pipe_scissor_state scissor; + struct pipe_blend_state *blend; + struct vc5_rasterizer_state *rasterizer; + struct vc5_depth_stencil_alpha_state *zsa; + + struct vc5_texture_stateobj verttex, fragtex; + + struct vc5_program_stateobj prog; + + struct vc5_vertex_stateobj *vtx; + + struct { + struct pipe_blend_color f; + uint16_t hf[4]; + } blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct pipe_framebuffer_state framebuffer; + + /* Per render target, whether we should swap the R and B fields in the + * shader's color output and in blending. If render targets disagree + * on the R/B swap and use the constant color, then we would need to + * fall back to in-shader blending. + */ + uint8_t swap_color_rb; + + /* Per render target, whether we should treat the dst alpha values as + * one in blending. + * + * For RGBX formats, the tile buffer's alpha channel will be + * undefined. + */ + uint8_t blend_dst_alpha_one; + + bool active_queries; + + uint32_t tf_prims_generated; + uint32_t prims_generated; + + struct pipe_poly_stipple stipple; + struct pipe_clip_state clip; + struct pipe_viewport_state viewport; + struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct vc5_vertexbuf_stateobj vertexbuf; + struct vc5_streamout_stateobj streamout; + struct vc5_bo *current_oq; + /** @} */ +}; + +struct vc5_rasterizer_state { + struct pipe_rasterizer_state base; + + /* VC5_CONFIGURATION_BITS */ + uint8_t config_bits[3]; + + float point_size; + + /** + * Half-float (1/8/7 bits) value of polygon offset units for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_units; + /** + * Half-float (1/8/7 bits) value of polygon offset scale for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_factor; +}; + +struct vc5_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + enum vc5_ez_state ez_state; + + /** Uniforms for stencil state. + * + * Index 0 is either the front config, or the front-and-back config. + * Index 1 is the back config if doing separate back stencil. + * Index 2 is the writemask config if it's not a common mask value. + */ + uint32_t stencil_uniforms[3]; + + uint8_t stencil_front[6]; + uint8_t stencil_back[6]; +}; + +#define perf_debug(...) do { \ + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +static inline struct vc5_context * +vc5_context(struct pipe_context *pcontext) +{ + return (struct vc5_context *)pcontext; +} + +static inline struct vc5_sampler_view * +vc5_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct vc5_sampler_view *)psview; +} + +static inline struct vc5_sampler_state * +vc5_sampler_state(struct pipe_sampler_state *psampler) +{ + return (struct vc5_sampler_state *)psampler; +} + +struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); +void vc5_program_init(struct pipe_context *pctx); +void vc5_program_fini(struct pipe_context *pctx); +void vc5_query_init(struct pipe_context *pctx); + +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +int vc5_simulator_flush(struct vc5_context *vc5, + struct drm_v3d_submit_cl *args, + struct vc5_job *job); +int vc5_simulator_ioctl(int fd, unsigned long request, void *arg); +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size); + +static inline int +vc5_ioctl(int fd, unsigned long request, void *arg) +{ + if (using_vc5_simulator) + return vc5_simulator_ioctl(fd, request, arg); + else + return drmIoctl(fd, request, arg); +} + +void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader); +struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate); + +void vc5_flush(struct pipe_context *pctx); +void vc5_job_init(struct vc5_context *vc5); +struct vc5_job *vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, + struct pipe_surface *zsbuf); +struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5); +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); +void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc); +void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job); +void vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode); + +bool vc5_rt_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f); +bool vc5_tex_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f); +uint8_t vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f); +uint8_t vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f); +uint8_t vc5_get_tex_return_size(const struct v3d_device_info *devinfo, + enum pipe_format f, + enum pipe_tex_compare compare); +uint8_t vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, + enum pipe_format f); +const uint8_t *vc5_get_format_swizzle(const struct v3d_device_info *devinfo, + enum pipe_format f); +void vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, + uint32_t format, + uint32_t *type, + uint32_t *bpp); + +void vc5_init_query_functions(struct vc5_context *vc5); +void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); +void vc5_blitter_save(struct vc5_context *vc5); + +struct vc5_fence *vc5_fence_create(struct vc5_context *vc5); + +#ifdef v3dX +# include "v3dx_context.h" +#else +# define v3dX(x) v3d33_##x +# include "v3dx_context.h" +# undef v3dX + +# define v3dX(x) v3d41_##x +# include "v3dx_context.h" +# undef v3dX +#endif + +#endif /* VC5_CONTEXT_H */ diff --git a/src/gallium/drivers/v3d/v3d_fence.c b/src/gallium/drivers/v3d/v3d_fence.c new file mode 100644 index 00000000000..54bce562403 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_fence.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_fence.c + * + * Seqno-based fence management. + * + * We have two mechanisms for waiting in our kernel API: You can wait on a BO + * to have all rendering to from any process to be completed, or wait on a + * seqno for that particular seqno to be passed. The fence API we're + * implementing is based on waiting for all rendering in the context to have + * completed (with no reference to what other processes might be doing with + * the same BOs), so we can just use the seqno of the last rendering we'd + * fired off as our fence marker. + */ + +#include "util/u_inlines.h" + +#include "v3d_context.h" +#include "v3d_bufmgr.h" + +struct vc5_fence { + struct pipe_reference reference; + uint32_t sync; +}; + +static void +vc5_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pp, + struct pipe_fence_handle *pf) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence **p = (struct vc5_fence **)pp; + struct vc5_fence *f = (struct vc5_fence *)pf; + struct vc5_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + drmSyncobjDestroy(screen->fd, old->sync); + free(old); + } + *p = f; +} + +static boolean +vc5_fence_finish(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_fence_handle *pf, + uint64_t timeout_ns) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence *f = (struct vc5_fence *)pf; + + return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL); +} + +struct vc5_fence * +vc5_fence_create(struct vc5_context *vc5) +{ + struct vc5_fence *f = calloc(1, sizeof(*f)); + if (!f) + return NULL; + + uint32_t new_sync; + /* Make a new sync object for the context. */ + int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &new_sync); + if (ret) { + free(f); + return NULL; + } + + pipe_reference_init(&f->reference, 1); + f->sync = vc5->out_sync; + vc5->out_sync = new_sync; + + return f; +} + +void +vc5_fence_init(struct vc5_screen *screen) +{ + screen->base.fence_reference = vc5_fence_reference; + screen->base.fence_finish = vc5_fence_finish; +} diff --git a/src/gallium/drivers/v3d/v3d_format_table.h b/src/gallium/drivers/v3d/v3d_format_table.h new file mode 100644 index 00000000000..8b8011351a1 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_format_table.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define V3D_OUTPUT_IMAGE_FORMAT_NO 255 + +#include <stdbool.h> +#include <stdint.h> + +struct vc5_format { + /** Set if the pipe format is defined in the table. */ + bool present; + + /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + uint8_t rt_type; + + /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ + uint8_t tex_type; + + /** + * Swizzle to apply to the RGBA shader output for storing to the tile + * buffer, to the RGBA tile buffer to produce shader input (for + * blending), and for turning the rgba8888 texture sampler return + * value into shader rgba values. + */ + uint8_t swizzle[4]; + + /* Whether the return value is 16F/I/UI or 32F/I/UI. */ + uint8_t return_size; + + /* If return_size == 32, how many channels are returned by texturing. + * 16 always returns 2 pairs of 16 bit values. + */ + uint8_t return_channels; +}; diff --git a/src/gallium/drivers/v3d/v3d_formats.c b/src/gallium/drivers/v3d/v3d_formats.c new file mode 100644 index 00000000000..8424b368cf4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_formats.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_formats.c + * + * Contains the table and accessors for VC5 texture and render target format + * support. + * + * The hardware has limited support for texture formats, and extremely limited + * support for render target formats. As a result, we emulate other formats + * in our shader code, and this stores the table for doing so. + */ + +#include "util/macros.h" + +#include "v3d_context.h" +#include "v3d_format_table.h" + +static const struct vc5_format * +get_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + if (devinfo->ver >= 41) + return v3d41_get_format_desc(f); + else + return v3d33_get_format_desc(f); +} + +bool +vc5_rt_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return false; + + return vf->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO; +} + +uint8_t +vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->rt_type; +} + +bool +vc5_tex_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + return vf != NULL; +} + +uint8_t +vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->tex_type; +} + +uint8_t +vc5_get_tex_return_size(const struct v3d_device_info *devinfo, + enum pipe_format f, enum pipe_tex_compare compare) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + if (compare == PIPE_TEX_COMPARE_R_TO_TEXTURE) + return 16; + + return vf->return_size; +} + +uint8_t +vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->return_channels; +} + +const uint8_t * +vc5_get_format_swizzle(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + static const uint8_t fallback[] = {0, 1, 2, 3}; + + if (!vf) + return fallback; + + return vf->swizzle; +} + +void +vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, + uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + if (devinfo->ver >= 41) { + return v3d41_get_internal_type_bpp_for_output_format(format, + type, bpp); + } else { + return v3d33_get_internal_type_bpp_for_output_format(format, + type, bpp); + } +} diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c new file mode 100644 index 00000000000..85c64df34ca --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_job.c @@ -0,0 +1,452 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_job.c + * + * Functions for submitting VC5 render jobs to the kernel. + */ + +#include <xf86drm.h> +#include "v3d_context.h" +/* The OQ/semaphore packets are the same across V3D versions. */ +#define V3D_VERSION 33 +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/common/v3d_macros.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "broadcom/clif/clif_dump.h" + +static void +remove_from_ht(struct hash_table *ht, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); +} + +static void +vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) +{ + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + vc5_bo_unreference(&bo); + } + + remove_from_ht(vc5->jobs, &job->key); + + if (job->write_prscs) { + struct set_entry *entry; + + set_foreach(job->write_prscs, entry) { + const struct pipe_resource *prsc = entry->key; + + remove_from_ht(vc5->write_jobs, (void *)prsc); + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], NULL); + } + } + if (job->zsbuf) { + remove_from_ht(vc5->write_jobs, job->zsbuf->texture); + pipe_surface_reference(&job->zsbuf, NULL); + } + + if (vc5->job == job) + vc5->job = NULL; + + vc5_destroy_cl(&job->bcl); + vc5_destroy_cl(&job->rcl); + vc5_destroy_cl(&job->indirect); + vc5_bo_unreference(&job->tile_alloc); + vc5_bo_unreference(&job->tile_state); + + ralloc_free(job); +} + +static struct vc5_job * +vc5_job_create(struct vc5_context *vc5) +{ + struct vc5_job *job = rzalloc(vc5, struct vc5_job); + + job->vc5 = vc5; + + vc5_init_cl(job, &job->bcl); + vc5_init_cl(job, &job->rcl); + vc5_init_cl(job, &job->indirect); + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + job->bos = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + return job; +} + +void +vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo) +{ + if (!bo) + return; + + if (_mesa_set_search(job->bos, bo)) + return; + + vc5_bo_reference(bo); + _mesa_set_add(job->bos, bo); + job->referenced_size += bo->size; + + uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; + + if (job->submit.bo_handle_count >= job->bo_handles_size) { + job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); + bo_handles = reralloc(job, bo_handles, + uint32_t, job->bo_handles_size); + job->submit.bo_handles = (uintptr_t)(void *)bo_handles; + } + bo_handles[job->submit.bo_handle_count++] = bo->handle; +} + +void +vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = job->vc5; + + if (!job->write_prscs) { + job->write_prscs = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + + _mesa_set_add(job->write_prscs, prsc); + _mesa_hash_table_insert(vc5->write_jobs, prsc, job); +} + +void +vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +void +vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_flush_jobs_writing_resource(vc5, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + + if (_mesa_set_search(job->bos, rsc->bo)) { + vc5_job_submit(vc5, job); + /* Reminder: vc5->jobs is safe to keep iterating even + * after deletion of an entry. + */ + continue; + } + } +} + +static void +vc5_job_set_tile_buffer_size(struct vc5_job *job) +{ + static const uint8_t tile_sizes[] = { + 64, 64, + 64, 32, + 32, 32, + 32, 16, + 16, 16, + }; + int tile_size_index = 0; + if (job->msaa) + tile_size_index += 2; + + if (job->cbufs[3] || job->cbufs[2]) + tile_size_index += 2; + else if (job->cbufs[1]) + tile_size_index++; + + int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + struct vc5_surface *surf = vc5_surface(job->cbufs[i]); + max_bpp = MAX2(max_bpp, surf->internal_bpp); + } + } + job->internal_bpp = max_bpp; + STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); + tile_size_index += max_bpp; + + assert(tile_size_index < ARRAY_SIZE(tile_sizes)); + job->tile_width = tile_sizes[tile_size_index * 2 + 0]; + job->tile_height = tile_sizes[tile_size_index * 2 + 1]; +} + +/** + * Returns a vc5_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc5_job * +vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc5_job_key local_key = { + .cbufs = { + cbufs[0], + cbufs[1], + cbufs[2], + cbufs[3], + }, + .zsbuf = zsbuf, + }; + struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + struct vc5_job *job = vc5_job_create(vc5); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) { + vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], cbufs[i]); + + if (cbufs[i]->texture->nr_samples > 1) + job->msaa = true; + } + } + if (zsbuf) { + vc5_flush_jobs_reading_resource(vc5, zsbuf->texture); + pipe_surface_reference(&job->zsbuf, zsbuf); + if (zsbuf->texture->nr_samples > 1) + job->msaa = true; + } + + vc5_job_set_tile_buffer_size(job); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) + _mesa_hash_table_insert(vc5->write_jobs, + cbufs[i]->texture, job); + } + if (zsbuf) + _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job); + + memcpy(&job->key, &local_key, sizeof(local_key)); + _mesa_hash_table_insert(vc5->jobs, &job->key, job); + + return job; +} + +struct vc5_job * +vc5_get_job_for_fbo(struct vc5_context *vc5) +{ + if (vc5->job) + return vc5->job; + + struct pipe_surface **cbufs = vc5->framebuffer.cbufs; + struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf; + struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf); + + /* The dirty flags are tracking what's been updated while vc5->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. + */ + vc5->dirty = ~0; + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + for (int i = 0; i < 4; i++) { + if (cbufs[i]) { + struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0 << i; + } + } + + if (zsbuf) { + struct vc5_resource *rsc = vc5_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height, + job->tile_height); + + vc5->job = job; + + return job; +} + +static bool +vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr) +{ + struct vc5_job *job = data; + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (void *)entry->key; + + if (addr >= bo->offset && + addr < bo->offset + bo->size) { + vc5_bo_map(bo); + *vaddr = bo->map + addr - bo->offset; + return true; + } + } + + return false; +} + +static void +vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!(V3D_DEBUG & V3D_DEBUG_CL)) + return; + + struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo, + stderr, vc5_clif_dump_lookup, + job); + + fprintf(stderr, "BCL: 0x%08x..0x%08x\n", + job->submit.bcl_start, job->submit.bcl_end); + + clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end); + + fprintf(stderr, "RCL: 0x%08x..0x%08x\n", + job->submit.rcl_start, job->submit.rcl_end); + clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end); +} + +/** + * Submits the job to the kernel and then reinitializes it. + */ +void +vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) +{ + MAYBE_UNUSED struct vc5_screen *screen = vc5->screen; + + if (!job->needs_flush) + goto done; + + if (vc5->screen->devinfo.ver >= 41) + v3d41_emit_rcl(job); + else + v3d33_emit_rcl(job); + + if (cl_offset(&job->bcl) > 0) { + if (screen->devinfo.ver >= 41) + v3d41_bcl_epilogue(vc5, job); + else + v3d33_bcl_epilogue(vc5, job); + } + + job->submit.out_sync = vc5->out_sync; + job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); + job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); + + /* On V3D 4.1, the tile alloc/state setup moved to register writes + * instead of binner packets. + */ + if (screen->devinfo.ver >= 41) { + vc5_job_add_bo(job, job->tile_alloc); + job->submit.qma = job->tile_alloc->offset; + job->submit.qms = job->tile_alloc->size; + + vc5_job_add_bo(job, job->tile_state); + job->submit.qts = job->tile_state->offset; + } + + vc5_clif_dump(vc5, job); + + if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { + int ret; + +#ifndef USE_V3D_SIMULATOR + ret = drmIoctl(vc5->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); +#else + ret = vc5_simulator_flush(vc5, &job->submit, job); +#endif + static bool warned = false; + if (ret && !warned) { + fprintf(stderr, "Draw call returned %s. " + "Expect corruption.\n", strerror(errno)); + warned = true; + } + } + +done: + vc5_job_free(vc5, job); +} + +static bool +vc5_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc5_job_key)) == 0; +} + +static uint32_t +vc5_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc5_job_key)); +} + +void +vc5_job_init(struct vc5_context *vc5) +{ + vc5->jobs = _mesa_hash_table_create(vc5, + vc5_job_hash, + vc5_job_compare); + vc5->write_jobs = _mesa_hash_table_create(vc5, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c new file mode 100644 index 00000000000..ce2e0be8ed2 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -0,0 +1,682 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/ralloc.h" +#include "util/hash_table.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "nir/tgsi_to_nir.h" +#include "compiler/v3d_compiler.h" +#include "v3d_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "mesa/state_tracker/st_glsl_types.h" + +static gl_varying_slot +vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) +{ + nir_foreach_variable(var, &s->outputs) { + if (var->data.driver_location == driver_location) { + return var->data.location; + } + } + + return -1; +} + +/** + * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. + * + * A shader can have 16 of these specs, and each one of them can write up to + * 16 dwords. Since we allow a total of 64 transform feedback output + * components (not 16 vectors), we have to group the writes of multiple + * varyings together in a single data spec. + */ +static void +vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, + const struct pipe_stream_output_info *stream_output) +{ + if (!stream_output->num_outputs) + return; + + struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; + int slot_count = 0; + + for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { + uint32_t buffer_offset = 0; + uint32_t vpm_start = slot_count; + + for (int i = 0; i < stream_output->num_outputs; i++) { + const struct pipe_stream_output *output = + &stream_output->output[i]; + + if (output->output_buffer != buffer) + continue; + + /* We assume that the SO outputs appear in increasing + * order in the buffer. + */ + assert(output->dst_offset >= buffer_offset); + + /* Pad any undefined slots in the output */ + for (int j = buffer_offset; j < output->dst_offset; j++) { + slots[slot_count] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); + slot_count++; + buffer_offset++; + } + + /* Set the coordinate shader up to output the + * components of this varying. + */ + for (int j = 0; j < output->num_components; j++) { + gl_varying_slot slot = + vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index); + + slots[slot_count] = + v3d_slot_from_slot_and_component(slot, + output->start_component + j); + slot_count++; + buffer_offset++; + } + } + + uint32_t vpm_size = slot_count - vpm_start; + if (!vpm_size) + continue; + + uint32_t vpm_start_offset = vpm_start + 6; + + while (vpm_size) { + uint32_t write_size = MIN2(vpm_size, 1 << 4); + + struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + /* We need the offset from the coordinate shader's VPM + * output block, which has the [X, Y, Z, W, Xs, Ys] + * values at the start. + */ + .first_shaded_vertex_value_to_output = vpm_start_offset, + .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1, + .output_buffer_to_write_to = buffer, + }; + + /* GFXH-1559 */ + assert(unpacked.first_shaded_vertex_value_to_output != 8 || + so->num_tf_specs != 0); + + assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs[so->num_tf_specs], + &unpacked); + + /* If point size is being written by the shader, then + * all the VPM start offsets are shifted up by one. + * We won't know that until the variant is compiled, + * though. + */ + unpacked.first_shaded_vertex_value_to_output++; + + /* GFXH-1559 */ + assert(unpacked.first_shaded_vertex_value_to_output != 8 || + so->num_tf_specs != 0); + + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs_psiz[so->num_tf_specs], + &unpacked); + so->num_tf_specs++; + vpm_start_offset += write_size; + vpm_size -= write_size; + } + } + + so->num_tf_outputs = slot_count; + so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, + slot_count); + memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); +} + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static int +uniforms_type_size(const struct glsl_type *type) +{ + return st_glsl_storage_type_size(type, false); +} + +static void * +vc5_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader); + if (!so) + return NULL; + + so->program_id = vc5->next_uncompiled_program_id++; + + nir_shader *s; + + if (cso->type == PIPE_SHADER_IR_NIR) { + /* The backend takes ownership of the NIR shader on state + * creation. + */ + s = cso->ir.nir; + + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, + (nir_lower_io_options)0); + NIR_PASS_V(s, nir_lower_io, nir_var_uniform, + uniforms_type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + if (V3D_DEBUG & V3D_DEBUG_TGSI) { + fprintf(stderr, "prog %d TGSI:\n", + so->program_id); + tgsi_dump(cso->tokens, 0); + fprintf(stderr, "\n"); + } + s = tgsi_to_nir(cso->tokens, &v3d_nir_options); + + so->was_tgsi = true; + } + + NIR_PASS_V(s, nir_opt_global_to_local); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_normalize_cubemap_coords); + + NIR_PASS_V(s, nir_lower_load_const_to_scalar); + + v3d_optimize_nir(s); + + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + + /* Garbage collect dead instructions */ + nir_sweep(s); + + so->base.type = PIPE_SHADER_IR_NIR; + so->base.ir.nir = s; + + vc5_set_transform_feedback_outputs(so, &cso->stream_output); + + if (V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage(s->info.stage))) { + fprintf(stderr, "%s prog %d NIR:\n", + gl_shader_stage_name(s->info.stage), + so->program_id); + nir_print_shader(s, stderr); + fprintf(stderr, "\n"); + } + + return so; +} + +static struct vc5_compiled_shader * +vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key) +{ + struct vc5_uncompiled_shader *shader_state = key->shader_state; + nir_shader *s = shader_state->base.ir.nir; + + struct hash_table *ht; + uint32_t key_size; + if (s->info.stage == MESA_SHADER_FRAGMENT) { + ht = vc5->fs_cache; + key_size = sizeof(struct v3d_fs_key); + } else { + ht = vc5->vs_cache; + key_size = sizeof(struct v3d_vs_key); + } + + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + if (entry) + return entry->data; + + struct vc5_compiled_shader *shader = + rzalloc(NULL, struct vc5_compiled_shader); + + int program_id = shader_state->program_id; + int variant_id = + p_atomic_inc_return(&shader_state->compiled_variant_count); + uint64_t *qpu_insts; + uint32_t shader_size; + + switch (s->info.stage) { + case MESA_SHADER_VERTEX: + shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); + + qpu_insts = v3d_compile_vs(vc5->screen->compiler, + (struct v3d_vs_key *)key, + shader->prog_data.vs, s, + program_id, variant_id, + &shader_size); + break; + case MESA_SHADER_FRAGMENT: + shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); + + qpu_insts = v3d_compile_fs(vc5->screen->compiler, + (struct v3d_fs_key *)key, + shader->prog_data.fs, s, + program_id, variant_id, + &shader_size); + break; + default: + unreachable("bad stage"); + } + + vc5_set_shader_uniform_dirty_flags(shader); + + shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader"); + vc5_bo_map(shader->bo); + memcpy(shader->bo->map, qpu_insts, shader_size); + + free(qpu_insts); + + struct vc5_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + + if (shader->prog_data.base->spill_size > + vc5->prog.spill_size_per_thread) { + /* Max 4 QPUs per slice, 3 slices per core. We only do single + * core so far. This overallocates memory on smaller cores. + */ + int total_spill_size = + 4 * 3 * shader->prog_data.base->spill_size; + + vc5_bo_unreference(&vc5->prog.spill_bo); + vc5->prog.spill_bo = vc5_bo_alloc(vc5->screen, + total_spill_size, "spill"); + vc5->prog.spill_size_per_thread = + shader->prog_data.base->spill_size; + } + + return shader; +} + +static void +vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key, + struct vc5_texture_stateobj *texstate) +{ + const struct v3d_device_info *devinfo = &vc5->screen->devinfo; + + for (int i = 0; i < texstate->num_textures; i++) { + struct pipe_sampler_view *sampler = texstate->textures[i]; + struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler); + struct pipe_sampler_state *sampler_state = + texstate->samplers[i]; + + if (!sampler) + continue; + + key->tex[i].return_size = + vc5_get_tex_return_size(devinfo, + sampler->format, + sampler_state->compare_mode); + + /* For 16-bit, we set up the sampler to always return 2 + * channels (meaning no recompiles for most statechanges), + * while for 32 we actually scale the returns with channels. + */ + if (key->tex[i].return_size == 16) { + key->tex[i].return_channels = 2; + } else if (devinfo->ver > 40) { + key->tex[i].return_channels = 4; + } else { + key->tex[i].return_channels = + vc5_get_tex_return_channels(devinfo, + sampler->format); + } + + if (key->tex[i].return_size == 32 && devinfo->ver < 40) { + memcpy(key->tex[i].swizzle, + vc5_sampler->swizzle, + sizeof(vc5_sampler->swizzle)); + } else { + /* For 16-bit returns, we let the sampler state handle + * the swizzle. + */ + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } + + if (sampler) { + key->tex[i].compare_mode = sampler_state->compare_mode; + key->tex[i].compare_func = sampler_state->compare_func; + key->tex[i].clamp_s = + sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; + key->tex[i].clamp_t = + sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP; + key->tex[i].clamp_r = + sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; + } + } + + key->ucp_enables = vc5->rasterizer->base.clip_plane_enable; +} + +static void +vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct vc5_job *job = vc5->job; + struct v3d_fs_key local_key; + struct v3d_fs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_BLEND | + VC5_DIRTY_FRAMEBUFFER | + VC5_DIRTY_ZSA | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_SAMPLE_MASK | + VC5_DIRTY_FRAGTEX | + VC5_DIRTY_UNCOMPILED_FS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex); + key->base.shader_state = vc5->prog.bind_fs; + key->is_points = (prim_mode == PIPE_PRIM_POINTS); + key->is_lines = (prim_mode >= PIPE_PRIM_LINES && + prim_mode <= PIPE_PRIM_LINE_STRIP); + key->clamp_color = vc5->rasterizer->base.clamp_fragment_color; + if (vc5->blend->logicop_enable) { + key->logicop_func = vc5->blend->logicop_func; + } else { + key->logicop_func = PIPE_LOGICOP_COPY; + } + if (job->msaa) { + key->msaa = vc5->rasterizer->base.multisample; + key->sample_coverage = (vc5->rasterizer->base.multisample && + vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage; + key->sample_alpha_to_one = vc5->blend->alpha_to_one; + } + + key->depth_enabled = (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled); + if (vc5->zsa->base.alpha.enabled) { + key->alpha_test = true; + key->alpha_test_func = vc5->zsa->base.alpha.func; + } + + /* gl_FragColor's propagation to however many bound color buffers + * there are means that the buffer count needs to be in the key. + */ + key->nr_cbufs = vc5->framebuffer.nr_cbufs; + key->swap_color_rb = vc5->swap_color_rb; + + for (int i = 0; i < key->nr_cbufs; i++) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + if (!cbuf) + continue; + + const struct util_format_description *desc = + util_format_description(cbuf->format); + + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } + + if (vc5->prog.bind_fs->was_tgsi) { + if (util_format_is_pure_uint(cbuf->format)) + key->uint_color_rb |= 1 << i; + else if (util_format_is_pure_sint(cbuf->format)) + key->int_color_rb |= 1 << i; + } + } + + if (key->is_points) { + key->point_sprite_mask = + vc5->rasterizer->base.sprite_coord_enable; + key->point_coord_upper_left = + (vc5->rasterizer->base.sprite_coord_mode == + PIPE_SPRITE_COORD_UPPER_LEFT); + } + + key->light_twoside = vc5->rasterizer->base.light_twoside; + key->shade_model_flat = vc5->rasterizer->base.flatshade; + + struct vc5_compiled_shader *old_fs = vc5->prog.fs; + vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base); + if (vc5->prog.fs == old_fs) + return; + + vc5->dirty |= VC5_DIRTY_COMPILED_FS; + + if (old_fs) { + if (vc5->prog.fs->prog_data.fs->flat_shade_flags != + old_fs->prog_data.fs->flat_shade_flags) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + if (vc5->prog.fs->prog_data.fs->centroid_flags != + old_fs->prog_data.fs->centroid_flags) { + vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS; + } + } + + if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots, + old_fs->prog_data.fs->input_slots, + sizeof(vc5->prog.fs->prog_data.fs->input_slots))) { + vc5->dirty |= VC5_DIRTY_FS_INPUTS; + } +} + +static void +vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct v3d_vs_key local_key; + struct v3d_vs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_VERTTEX | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_UNCOMPILED_VS | + VC5_DIRTY_FS_INPUTS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->verttex); + key->base.shader_state = vc5->prog.bind_vs; + key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs; + STATIC_ASSERT(sizeof(key->fs_inputs) == + sizeof(vc5->prog.fs->prog_data.fs->input_slots)); + memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots, + sizeof(key->fs_inputs)); + key->clamp_color = vc5->rasterizer->base.clamp_vertex_color; + + key->per_vertex_point_size = + (prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + struct vc5_compiled_shader *vs = + vc5_get_compiled_shader(vc5, &key->base); + if (vs != vc5->prog.vs) { + vc5->prog.vs = vs; + vc5->dirty |= VC5_DIRTY_COMPILED_VS; + } + + key->is_coord = true; + /* Coord shaders only output varyings used by transform feedback. */ + struct vc5_uncompiled_shader *shader_state = key->base.shader_state; + memcpy(key->fs_inputs, shader_state->tf_outputs, + sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_fs_inputs) { + memset(&key->fs_inputs[shader_state->num_tf_outputs], + 0, + sizeof(*key->fs_inputs) * (key->num_fs_inputs - + shader_state->num_tf_outputs)); + } + key->num_fs_inputs = shader_state->num_tf_outputs; + + struct vc5_compiled_shader *cs = + vc5_get_compiled_shader(vc5, &key->base); + if (cs != vc5->prog.cs) { + vc5->prog.cs = cs; + vc5->dirty |= VC5_DIRTY_COMPILED_CS; + } +} + +void +vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode) +{ + vc5_update_compiled_fs(vc5, prim_mode); + vc5_update_compiled_vs(vc5, prim_mode); +} + +static uint32_t +fs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); +} + +static uint32_t +vs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); +} + +static bool +fs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; +} + +static bool +vs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; +} + +static void +delete_from_cache_if_matches(struct hash_table *ht, + struct vc5_compiled_shader **last_compile, + struct hash_entry *entry, + struct vc5_uncompiled_shader *so) +{ + const struct v3d_key *key = entry->key; + + if (key->shader_state == so) { + struct vc5_compiled_shader *shader = entry->data; + _mesa_hash_table_remove(ht, entry); + vc5_bo_unreference(&shader->bo); + + if (shader == *last_compile) + *last_compile = NULL; + + ralloc_free(shader); + } +} + +static void +vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = hwcso; + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs, + entry, so); + } + hash_table_foreach(vc5->vs_cache, entry) { + delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs, + entry, so); + } + + ralloc_free(so->base.ir.nir); + free(so); +} + +static void +vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_fs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS; +} + +static void +vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_vs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS; +} + +void +vc5_program_init(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + pctx->create_vs_state = vc5_shader_state_create; + pctx->delete_vs_state = vc5_shader_state_delete; + + pctx->create_fs_state = vc5_shader_state_create; + pctx->delete_fs_state = vc5_shader_state_delete; + + pctx->bind_fs_state = vc5_fp_state_bind; + pctx->bind_vs_state = vc5_vp_state_bind; + + vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, + fs_cache_compare); + vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, + vs_cache_compare); +} + +void +vc5_program_fini(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->fs_cache, entry); + } + + hash_table_foreach(vc5->vs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->vs_cache, entry); + } +} diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c new file mode 100644 index 00000000000..f645544bedf --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_query.c @@ -0,0 +1,180 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * Gallium query object support. + * + * The HW has native support for occlusion queries, with the query result + * being loaded and stored by the TLB unit. From a SW perspective, we have to + * be careful to make sure that the jobs that need to be tracking queries are + * bracketed by the start and end of counting, even across FBO transitions. + * + * For the transform feedback PRIMITIVES_GENERATED/WRITTEN queries, we have to + * do the calculations in software at draw time. + */ + +#include "v3d_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +struct vc5_query +{ + enum pipe_query_type type; + struct vc5_bo *bo; + + uint32_t start, end; +}; + +static struct pipe_query * +vc5_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) +{ + struct vc5_query *q = calloc(1, sizeof(*q)); + + q->type = query_type; + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)q; +} + +static void +vc5_destroy_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_query *q = (struct vc5_query *)query; + + vc5_bo_unreference(&q->bo); + free(q); +} + +static boolean +vc5_begin_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_query *q = (struct vc5_query *)query; + + switch (q->type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + q->start = vc5->prims_generated; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->start = vc5->tf_prims_generated; + break; + default: + q->bo = vc5_bo_alloc(vc5->screen, 4096, "query"); + + uint32_t *map = vc5_bo_map(q->bo); + *map = 0; + vc5->current_oq = q->bo; + vc5->dirty |= VC5_DIRTY_OQ; + break; + } + + return true; +} + +static bool +vc5_end_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_query *q = (struct vc5_query *)query; + + switch (q->type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + q->end = vc5->prims_generated; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->end = vc5->tf_prims_generated; + break; + default: + vc5->current_oq = NULL; + vc5->dirty |= VC5_DIRTY_OQ; + break; + } + + return true; +} + +static boolean +vc5_get_query_result(struct pipe_context *pctx, struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + struct vc5_query *q = (struct vc5_query *)query; + uint32_t result = 0; + + if (q->bo) { + /* XXX: Only flush the jobs using this BO. */ + vc5_flush(pctx); + + if (wait) { + if (!vc5_bo_wait(q->bo, 0, "query")) + return false; + } else { + if (!vc5_bo_wait(q->bo, ~0ull, "query")) + return false; + } + + /* XXX: Sum up per-core values. */ + uint32_t *map = vc5_bo_map(q->bo); + result = *map; + + vc5_bo_unreference(&q->bo); + } + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + vresult->u64 = result; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + vresult->b = result != 0; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + vresult->u64 = q->end - q->start; + break; + default: + unreachable("unsupported query type"); + } + + return true; +} + +static void +vc5_set_active_query_state(struct pipe_context *pctx, boolean enable) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->active_queries = enable; + vc5->dirty |= VC5_DIRTY_OQ; + vc5->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +vc5_query_init(struct pipe_context *pctx) +{ + pctx->create_query = vc5_create_query; + pctx->destroy_query = vc5_destroy_query; + pctx->begin_query = vc5_begin_query; + pctx->end_query = vc5_end_query; + pctx->get_query_result = vc5_get_query_result; + pctx->set_active_query_state = vc5_set_active_query_state; +} + diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c new file mode 100644 index 00000000000..1cd3f1949a2 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_resource.c @@ -0,0 +1,914 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_blit.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_transfer_helper.h" +#include "util/u_upload_mgr.h" +#include "util/u_format_zs.h" + +#include "drm_fourcc.h" +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" +#include "v3d_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +vc5_debug_resource_layout(struct vc5_resource *rsc, const char *caller) +{ + if (!(V3D_DEBUG & V3D_DEBUG_SURFACE)) + return; + + struct pipe_resource *prsc = &rsc->base; + + if (prsc->target == PIPE_BUFFER) { + fprintf(stderr, + "rsc %s %p (format %s), %dx%d buffer @0x%08x-0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + rsc->bo->offset, + rsc->bo->offset + rsc->bo->size - 1); + return; + } + + static const char *const tiling_descriptions[] = { + [VC5_TILING_RASTER] = "R", + [VC5_TILING_LINEARTILE] = "LT", + [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1", + [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2", + [VC5_TILING_UIF_NO_XOR] = "UIF", + [VC5_TILING_UIF_XOR] = "UIF^", + }; + + for (int i = 0; i <= prsc->last_level; i++) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + int level_width = slice->stride / rsc->cpp; + int level_height = slice->padded_height; + int level_depth = + u_minify(util_next_power_of_two(prsc->depth0), i); + + fprintf(stderr, + "rsc %s %p (format %s), %dx%d: " + "level %d (%s) %dx%dx%d -> %dx%dx%d, stride %d@0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + i, tiling_descriptions[slice->tiling], + u_minify(prsc->width0, i), + u_minify(prsc->height0, i), + u_minify(prsc->depth0, i), + level_width, + level_height, + level_depth, + slice->stride, + rsc->bo->offset + slice->offset); + } +} + +static bool +vc5_resource_bo_alloc(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + struct pipe_screen *pscreen = prsc->screen; + struct vc5_bo *bo; + + bo = vc5_bo_alloc(vc5_screen(pscreen), rsc->size, "resource"); + if (bo) { + vc5_bo_unreference(&rsc->bo); + rsc->bo = bo; + vc5_debug_resource_layout(rsc, "alloc"); + return true; + } else { + return false; + } +} + +static void +vc5_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_transfer *trans = vc5_transfer(ptrans); + + if (trans->map) { + struct vc5_resource *rsc = vc5_resource(ptrans->resource); + struct vc5_resource_slice *slice = &rsc->slices[ptrans->level]; + + if (ptrans->usage & PIPE_TRANSFER_WRITE) { + for (int z = 0; z < ptrans->box.depth; z++) { + void *dst = rsc->bo->map + + vc5_layer_offset(&rsc->base, + ptrans->level, + ptrans->box.z + z); + vc5_store_tiled_image(dst, + slice->stride, + (trans->map + + ptrans->stride * + ptrans->box.height * z), + ptrans->stride, + slice->tiling, rsc->cpp, + slice->padded_height, + &ptrans->box); + } + } + free(trans->map); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&vc5->transfer_pool, ptrans); +} + +static void * +vc5_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_transfer *trans; + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + /* MSAA maps should have been handled by u_transfer_helper. */ + assert(prsc->nr_samples <= 1); + + /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is + * being mapped. + */ + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && + prsc->last_level == 0 && + prsc->width0 == box->width && + prsc->height0 == box->height && + prsc->depth0 == box->depth && + prsc->array_size == 1 && + rsc->bo->private) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + if (vc5_resource_bo_alloc(rsc)) { + /* If it might be bound as one of our vertex buffers + * or UBOs, make sure we re-emit vertex buffer state + * or uniforms. + */ + if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) + vc5->dirty |= VC5_DIRTY_VTXBUF; + if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) + vc5->dirty |= VC5_DIRTY_CONSTBUF; + } else { + /* If we failed to reallocate, flush users so that we + * don't violate any syncing requirements. + */ + vc5_flush_jobs_reading_resource(vc5, prsc); + } + } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + /* If we're writing and the buffer is being used by the CL, we + * have to flush the CL first. If we're only reading, we need + * to flush if the CL has written our buffer. + */ + if (usage & PIPE_TRANSFER_WRITE) + vc5_flush_jobs_reading_resource(vc5, prsc); + else + vc5_flush_jobs_writing_resource(vc5, prsc); + } + + if (usage & PIPE_TRANSFER_WRITE) { + rsc->writes++; + rsc->initialized_buffers = ~0; + } + + trans = slab_alloc(&vc5->transfer_pool); + if (!trans) + return NULL; + + /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ + + /* slab_alloc_st() doesn't zero: */ + memset(trans, 0, sizeof(*trans)); + ptrans = &trans->base; + + pipe_resource_reference(&ptrans->resource, prsc); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + + /* Note that the current kernel implementation is synchronous, so no + * need to do syncing stuff here yet. + */ + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = vc5_bo_map_unsynchronized(rsc->bo); + else + buf = vc5_bo_map(rsc->bo); + if (!buf) { + fprintf(stderr, "Failed to map bo\n"); + goto fail; + } + + *pptrans = ptrans; + + /* Our load/store routines work on entire compressed blocks. */ + ptrans->box.x /= util_format_get_blockwidth(format); + ptrans->box.y /= util_format_get_blockheight(format); + ptrans->box.width = DIV_ROUND_UP(ptrans->box.width, + util_format_get_blockwidth(format)); + ptrans->box.height = DIV_ROUND_UP(ptrans->box.height, + util_format_get_blockheight(format)); + + struct vc5_resource_slice *slice = &rsc->slices[level]; + if (rsc->tiled) { + /* No direct mappings of tiled, since we need to manually + * tile/untile. + */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + ptrans->stride = ptrans->box.width * rsc->cpp; + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); + + if (usage & PIPE_TRANSFER_READ) { + for (int z = 0; z < ptrans->box.depth; z++) { + void *src = rsc->bo->map + + vc5_layer_offset(&rsc->base, + ptrans->level, + ptrans->box.z + z); + vc5_load_tiled_image((trans->map + + ptrans->stride * + ptrans->box.height * z), + ptrans->stride, + src, + slice->stride, + slice->tiling, rsc->cpp, + slice->padded_height, + &ptrans->box); + } + } + return trans->map; + } else { + ptrans->stride = slice->stride; + ptrans->layer_stride = ptrans->stride; + + return buf + slice->offset + + ptrans->box.y * ptrans->stride + + ptrans->box.x * rsc->cpp + + ptrans->box.z * rsc->cube_map_stride; + } + + +fail: + vc5_resource_transfer_unmap(pctx, ptrans); + return NULL; +} + +static void +vc5_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_bo_unreference(&rsc->bo); + free(rsc); +} + +static boolean +vc5_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *pctx, + struct pipe_resource *prsc, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_bo *bo = rsc->bo; + + whandle->stride = rsc->slices[0].stride; + + /* If we're passing some reference to our BO out to some other part of + * the system, then we can't do any optimizations about only us being + * the ones seeing it (like BO caching). + */ + bo->private = false; + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + return vc5_bo_flink(bo, &whandle->handle); + case DRM_API_HANDLE_TYPE_KMS: + whandle->handle = bo->handle; + return TRUE; + case DRM_API_HANDLE_TYPE_FD: + whandle->handle = vc5_bo_get_dmabuf(bo); + return whandle->handle != -1; + } + + return FALSE; +} + +#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1) +#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5) + +/** + * Computes the HW's UIFblock padding for a given height/cpp. + * + * The goal of the padding is to keep pages of the same color (bank number) at + * least half a page away from each other vertically when crossing between + * between columns of UIF blocks. + */ +static uint32_t +vc5_get_ub_pad(struct vc5_resource *rsc, uint32_t height) +{ + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_h = utile_h * 2; + uint32_t height_ub = height / uif_block_h; + + uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS; + + /* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */ + if (height_offset_in_pc == 0) + return 0; + + /* Try padding up to where we're offset by at least half a page. */ + if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) { + /* If we fit entirely in the page cache, don't pad. */ + if (height_ub < PAGE_CACHE_UB_ROWS) + return 0; + else + return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc; + } + + /* If we're close to being aligned to page cache size, then round up + * and rely on XOR. + */ + if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS) + return PAGE_CACHE_UB_ROWS - height_offset_in_pc; + + /* Otherwise, we're far enough away (top and bottom) to not need any + * padding. + */ + return 0; +} + +static void +vc5_setup_slices(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + /* Note that power-of-two padding is based on level 1. These are not + * equivalent to just util_next_power_of_two(dimension), because at a + * level 0 dimension of 9, the level 1 power-of-two padded value is 4, + * not 8. + */ + uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1)); + uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1)); + uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1)); + uint32_t offset = 0; + uint32_t utile_w = vc5_utile_width(rsc->cpp); + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_w = utile_w * 2; + uint32_t uif_block_h = utile_h * 2; + uint32_t block_width = util_format_get_blockwidth(prsc->format); + uint32_t block_height = util_format_get_blockheight(prsc->format); + bool msaa = prsc->nr_samples > 1; + /* MSAA textures/renderbuffers are always laid out as single-level + * UIF. + */ + bool uif_top = msaa; + + for (int i = prsc->last_level; i >= 0; i--) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + uint32_t level_width, level_height, level_depth; + if (i < 2) { + level_width = u_minify(width, i); + level_height = u_minify(height, i); + } else { + level_width = u_minify(pot_width, i); + level_height = u_minify(pot_height, i); + } + if (i < 1) + level_depth = u_minify(depth, i); + else + level_depth = u_minify(pot_depth, i); + + if (msaa) { + level_width *= 2; + level_height *= 2; + } + + level_width = DIV_ROUND_UP(level_width, block_width); + level_height = DIV_ROUND_UP(level_height, block_height); + + if (!rsc->tiled) { + slice->tiling = VC5_TILING_RASTER; + if (prsc->target == PIPE_TEXTURE_1D) + level_width = align(level_width, 64 / rsc->cpp); + } else { + if ((i != 0 || !uif_top) && + (level_width <= utile_w || + level_height <= utile_h)) { + slice->tiling = VC5_TILING_LINEARTILE; + level_width = align(level_width, utile_w); + level_height = align(level_height, utile_h); + } else if ((i != 0 || !uif_top) && + level_width <= uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + level_width = align(level_width, uif_block_w); + level_height = align(level_height, uif_block_h); + } else if ((i != 0 || !uif_top) && + level_width <= 2 * uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + level_width = align(level_width, 2 * uif_block_w); + level_height = align(level_height, uif_block_h); + } else { + /* We align the width to a 4-block column of + * UIF blocks, but we only align height to UIF + * blocks. + */ + level_width = align(level_width, + 4 * uif_block_w); + level_height = align(level_height, + uif_block_h); + + slice->ub_pad = vc5_get_ub_pad(rsc, + level_height); + level_height += slice->ub_pad * uif_block_h; + + /* If the padding set us to to be aligned to + * the page cache size, then the HW will use + * the XOR bit on odd columns to get us + * perfectly misaligned + */ + if ((level_height / uif_block_h) % + (VC5_PAGE_CACHE_SIZE / + VC5_UIFBLOCK_ROW_SIZE) == 0) { + slice->tiling = VC5_TILING_UIF_XOR; + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + } + } + } + + slice->offset = offset; + slice->stride = level_width * rsc->cpp; + slice->padded_height = level_height; + slice->size = level_height * slice->stride; + + uint32_t slice_total_size = slice->size * level_depth; + + /* The HW aligns level 1's base to a page if any of level 1 or + * below could be UIF XOR. The lower levels then inherit the + * alignment for as long as necesary, thanks to being power of + * two aligned. + */ + if (i == 1 && + level_width > 4 * uif_block_w && + level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) { + slice_total_size = align(slice_total_size, + VC5_UIFCFG_PAGE_SIZE); + } + + offset += slice_total_size; + + } + rsc->size = offset; + + /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only + * needs to be aligned to utile boundaries. Since tiles are laid out + * from small to big in memory, we need to align the later UIF slices + * to UIF blocks, if they were preceded by non-UIF-block-aligned LT + * slices. + * + * We additionally align to 4k, which improves UIF XOR performance. + */ + uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - + rsc->slices[0].offset); + if (page_align_offset) { + rsc->size += page_align_offset; + for (int i = 0; i <= prsc->last_level; i++) + rsc->slices[i].offset += page_align_offset; + } + + /* Arrays and cube textures have a stride which is the distance from + * one full mipmap tree to the next (64b aligned). For 3D textures, + * we need to program the stride between slices of miplevel 0. + */ + if (prsc->target != PIPE_TEXTURE_3D) { + rsc->cube_map_stride = align(rsc->slices[0].offset + + rsc->slices[0].size, 64); + rsc->size += rsc->cube_map_stride * (prsc->array_size - 1); + } else { + rsc->cube_map_stride = rsc->slices[0].size; + } +} + +uint32_t +vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, uint32_t layer) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_resource_slice *slice = &rsc->slices[level]; + + if (prsc->target == PIPE_TEXTURE_3D) + return slice->offset + layer * slice->size; + else + return slice->offset + layer * rsc->cube_map_stride; +} + +static struct vc5_resource * +vc5_resource_setup(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource); + if (!rsc) + return NULL; + struct pipe_resource *prsc = &rsc->base; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + if (prsc->nr_samples <= 1 || + screen->devinfo.ver >= 40 || + util_format_is_depth_or_stencil(prsc->format)) { + rsc->cpp = util_format_get_blocksize(prsc->format); + if (screen->devinfo.ver < 40 && prsc->nr_samples > 1) + rsc->cpp *= prsc->nr_samples; + } else { + assert(vc5_rt_format_supported(&screen->devinfo, prsc->format)); + uint32_t output_image_format = + vc5_get_rt_format(&screen->devinfo, prsc->format); + uint32_t internal_type; + uint32_t internal_bpp; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + output_image_format, + &internal_type, + &internal_bpp); + switch (internal_bpp) { + case V3D_INTERNAL_BPP_32: + rsc->cpp = 4; + break; + case V3D_INTERNAL_BPP_64: + rsc->cpp = 8; + break; + case V3D_INTERNAL_BPP_128: + rsc->cpp = 16; + break; + } + } + + assert(rsc->cpp); + + return rsc; +} + +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + +static struct pipe_resource * +vc5_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, + int count) +{ + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + /* Use a tiled layout if we can, for better 3D performance. */ + bool should_tile = true; + + /* VBOs/PBOs are untiled (and 1 height). */ + if (tmpl->target == PIPE_BUFFER) + should_tile = false; + + /* Cursors are always linear, and the user can request linear as well. + */ + if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) + should_tile = false; + + /* 1D and 1D_ARRAY textures are always raster-order. */ + if (tmpl->target == PIPE_TEXTURE_1D || + tmpl->target == PIPE_TEXTURE_1D_ARRAY) + should_tile = false; + + /* Scanout BOs for simulator need to be linear for interaction with + * i965. + */ + if (using_vc5_simulator && + tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + should_tile = false; + + /* No user-specified modifier; determine our own. */ + if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { + linear_ok = true; + rsc->tiled = should_tile; + } else if (should_tile && + find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + modifiers, count)) { + rsc->tiled = true; + } else if (linear_ok) { + rsc->tiled = false; + } else { + fprintf(stderr, "Unsupported modifier requested\n"); + return NULL; + } + + rsc->internal_format = prsc->format; + + vc5_setup_slices(rsc); + if (!vc5_resource_bo_alloc(rsc)) + goto fail; + + return prsc; +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +struct pipe_resource * +vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + +static struct pipe_resource * +vc5_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + struct vc5_resource_slice *slice = &rsc->slices[0]; + + if (!rsc) + return NULL; + + switch (whandle->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case DRM_FORMAT_MOD_INVALID: + rsc->tiled = false; + break; + /* XXX: UIF */ + default: + fprintf(stderr, + "Attempt to import unsupported modifier 0x%llx\n", + (long long)whandle->modifier); + goto fail; + } + + if (whandle->offset != 0) { + fprintf(stderr, + "Attempt to import unsupported winsys offset %u\n", + whandle->offset); + goto fail; + } + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + rsc->bo = vc5_bo_open_name(screen, + whandle->handle, whandle->stride); + break; + case DRM_API_HANDLE_TYPE_FD: + rsc->bo = vc5_bo_open_dmabuf(screen, + whandle->handle, whandle->stride); + break; + default: + fprintf(stderr, + "Attempt to import unsupported handle type %d\n", + whandle->type); + goto fail; + } + + if (!rsc->bo) + goto fail; + + rsc->internal_format = prsc->format; + + vc5_setup_slices(rsc); + vc5_debug_resource_layout(rsc, "import"); + + if (whandle->stride != slice->stride) { + static bool warned = false; + if (!warned) { + warned = true; + fprintf(stderr, + "Attempting to import %dx%d %s with " + "unsupported stride %d instead of %d\n", + prsc->width0, prsc->height0, + util_format_short_name(prsc->format), + whandle->stride, + slice->stride); + } + goto fail; + } + + return prsc; + +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_surface * +vc5_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_screen *screen = vc5->screen; + struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface); + struct vc5_resource *rsc = vc5_resource(ptex); + + if (!surface) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + struct vc5_resource_slice *slice = &rsc->slices[level]; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + surface->offset = vc5_layer_offset(ptex, level, + psurf->u.tex.first_layer); + surface->tiling = slice->tiling; + + surface->format = vc5_get_rt_format(&screen->devinfo, psurf->format); + + if (util_format_is_depth_or_stencil(psurf->format)) { + switch (psurf->format) { + case PIPE_FORMAT_Z16_UNORM: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_16; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_32F; + break; + default: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_24; + } + } else { + uint32_t bpp, type; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + surface->format, + &type, &bpp); + surface->internal_type = type; + surface->internal_bpp = bpp; + } + + if (surface->tiling == VC5_TILING_UIF_NO_XOR || + surface->tiling == VC5_TILING_UIF_XOR) { + surface->padded_height_of_output_image_in_uif_blocks = + (slice->padded_height / + (2 * vc5_utile_height(rsc->cpp))); + } + + if (rsc->separate_stencil) { + surface->separate_stencil = + vc5_create_surface(pctx, &rsc->separate_stencil->base, + surf_tmpl); + } + + return &surface->base; +} + +static void +vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + struct vc5_surface *surf = vc5_surface(psurf); + + if (surf->separate_stencil) + pipe_surface_reference(&surf->separate_stencil, NULL); + + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} + +static void +vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + /* All calls to flush_resource are followed by a flush of the context, + * so there's nothing to do. + */ +} + +static enum pipe_format +vc5_resource_get_internal_format(struct pipe_resource *prsc) +{ + return vc5_resource(prsc)->internal_format; +} + +static void +vc5_resource_set_stencil(struct pipe_resource *prsc, + struct pipe_resource *stencil) +{ + vc5_resource(prsc)->separate_stencil = vc5_resource(stencil); +} + +static struct pipe_resource * +vc5_resource_get_stencil(struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + return &rsc->separate_stencil->base; +} + +static const struct u_transfer_vtbl transfer_vtbl = { + .resource_create = vc5_resource_create, + .resource_destroy = vc5_resource_destroy, + .transfer_map = vc5_resource_transfer_map, + .transfer_unmap = vc5_resource_transfer_unmap, + .transfer_flush_region = u_default_transfer_flush_region, + .get_internal_format = vc5_resource_get_internal_format, + .set_stencil = vc5_resource_set_stencil, + .get_stencil = vc5_resource_get_stencil, +}; + +void +vc5_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create_with_modifiers = + vc5_resource_create_with_modifiers; + pscreen->resource_create = u_transfer_helper_resource_create; + pscreen->resource_from_handle = vc5_resource_from_handle; + pscreen->resource_get_handle = vc5_resource_get_handle; + pscreen->resource_destroy = u_transfer_helper_resource_destroy; + pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, + true, true, true); +} + +void +vc5_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_helper_transfer_map; + pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; + pctx->transfer_unmap = u_transfer_helper_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->create_surface = vc5_create_surface; + pctx->surface_destroy = vc5_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = vc5_blit; + pctx->flush_resource = vc5_flush_resource; +} diff --git a/src/gallium/drivers/v3d/v3d_resource.h b/src/gallium/drivers/v3d/v3d_resource.h new file mode 100644 index 00000000000..dc68f803e90 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_resource.h @@ -0,0 +1,175 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_RESOURCE_H +#define VC5_RESOURCE_H + +#include "v3d_screen.h" +#include "util/u_transfer.h" + +/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These + * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB + * page. Those pages are then arranged left-to-right, top-to-bottom, to cover + * an image. + * + * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte + * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). + */ + +/** + * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum vc5_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + VC5_TILING_RASTER, + + /* Single line of u-tiles. */ + VC5_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_XOR, +}; + +struct vc5_transfer { + struct pipe_transfer base; + void *map; +}; + +struct vc5_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t padded_height; + /* Size of a single pane of the slice. For 3D textures, there will be + * a number of panes equal to the minified, power-of-two-aligned + * depth. + */ + uint32_t size; + uint8_t ub_pad; + enum vc5_tiling_mode tiling; +}; + +struct vc5_surface { + struct pipe_surface base; + uint32_t offset; + enum vc5_tiling_mode tiling; + /** + * Output image format for TILE_RENDERING_MODE_CONFIGURATION + */ + uint8_t format; + + /** + * Internal format of the tile buffer for + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_type; + + /** + * internal bpp value (0=32bpp, 2=128bpp) for color buffers in + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_bpp; + + uint32_t padded_height_of_output_image_in_uif_blocks; + + /* If the resource being referenced is separate stencil, then this is + * the surface to use when reading/writing stencil. + */ + struct pipe_surface *separate_stencil; +}; + +struct vc5_resource { + struct pipe_resource base; + struct vc5_bo *bo; + struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS]; + uint32_t cube_map_stride; + uint32_t size; + int cpp; + bool tiled; + + /** + * Number of times the resource has been written to. + * + * This is used to track whether we need to load the surface on first + * rendering. + */ + uint64_t writes; + + /** + * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL + * for which parts of the resource are defined. + * + * Used for avoiding fallback to quad clears for clearing just depth, + * when the stencil contents have never been initialized. Note that + * we're lazy and fields not present in the buffer (DEPTH in a color + * buffer) may get marked. + */ + uint32_t initialized_buffers; + + enum pipe_format internal_format; + + /* Resource storing the S8 part of a Z32F_S8 resource, or NULL. */ + struct vc5_resource *separate_stencil; +}; + +static inline struct vc5_resource * +vc5_resource(struct pipe_resource *prsc) +{ + return (struct vc5_resource *)prsc; +} + +static inline struct vc5_surface * +vc5_surface(struct pipe_surface *psurf) +{ + return (struct vc5_surface *)psurf; +} + +static inline struct vc5_transfer * +vc5_transfer(struct pipe_transfer *ptrans) +{ + return (struct vc5_transfer *)ptrans; +} + +void vc5_resource_screen_init(struct pipe_screen *pscreen); +void vc5_resource_context_init(struct pipe_context *pctx); +struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl); +uint32_t vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, + uint32_t layer); + + +#endif /* VC5_RESOURCE_H */ diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c new file mode 100644 index 00000000000..95e6a6907f4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -0,0 +1,648 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "os/os_misc.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/ralloc.h" + +#include <xf86drm.h> +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" +#include "compiler/v3d_compiler.h" + +static const char * +vc5_screen_get_name(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (!screen->name) { + screen->name = ralloc_asprintf(screen, + "VC5 V3D %d.%d", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + } + + return screen->name; +} + +static const char * +vc5_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "Broadcom"; +} + +static void +vc5_screen_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + util_hash_table_destroy(screen->bo_handles); + vc5_bufmgr_destroy(pscreen); + slab_destroy_parent(&screen->transfer_pool); + + if (using_vc5_simulator) + vc5_simulator_destroy(screen); + + v3d_compiler_free(screen->compiler); + + close(screen->fd); + ralloc_free(pscreen); +} + +static int +vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_SM3: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: + return 1; + + case PIPE_CAP_INDEP_BLEND_ENABLE: + return screen->devinfo.ver >= 40; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 400; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + if (screen->devinfo.ver >= 40) + return 0; + else + return 1; + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + if (screen->devinfo.ver >= 40) + return 1; + else + return 0; + + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + return 1; + + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 64; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + + /* Unsupported features. */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_CULL_DISTANCE: + case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: + case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_MAX_WINDOW_RECTANGLES: + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_INT64: + case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_DOUBLES: + case PIPE_CAP_BINDLESS_TEXTURE: + case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_TGSI_BALLOT: + case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: + case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: + case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_NATIVE_FENCE_FD: + case PIPE_CAP_FENCE_SIGNAL: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: + case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_TILE_RASTER_ORDER: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: + case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: + case PIPE_CAP_CONTEXT_PRIORITY_MASK: + case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: + case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: + case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: + case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: + case PIPE_CAP_PACKED_UNIFORMS: + return 0; + + /* Geometry shader output, unsupported. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return VC5_MAX_MIP_LEVELS; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + + /* Queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + case PIPE_CAP_VENDOR_ID: + return 0x14E4; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + + default: + fprintf(stderr, "unknown param %d\n", param); + return 0; + } +} + +static float +vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 32; + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 512.0f; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; + default: + fprintf(stderr, "unknown paramf %d\n", param); + return 0; + } +} + +static int +vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return UINT_MAX; + + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VC5_MAX_FS_INPUTS / 4; + else + return 16; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return 4; + else + return VC5_MAX_FS_INPUTS / 4; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + return 0; + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return VC5_MAX_TEXTURE_SAMPLERS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; + default: + fprintf(stderr, "unknown shader param %d\n", param); + return 0; + } + return 0; +} + +static boolean +vc5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) + return FALSE; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + !util_format_is_supported(format, usage)) { + return FALSE; + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + case PIPE_FORMAT_R10G10B10A2_USCALED: + case PIPE_FORMAT_B10G10R10A2_USCALED: + case PIPE_FORMAT_R10G10B10A2_SSCALED: + case PIPE_FORMAT_B10G10R10A2_SSCALED: + break; + default: + return FALSE; + } + } + + if ((usage & PIPE_BIND_RENDER_TARGET) && + !vc5_rt_format_supported(&screen->devinfo, format)) { + return FALSE; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + !vc5_tex_format_supported(&screen->devinfo, format)) { + return FALSE; + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + !(format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_Z16_UNORM || + format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + return FALSE; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + !(format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT || + format == PIPE_FORMAT_I32_UINT)) { + return FALSE; + } + + return TRUE; +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static bool +vc5_get_device_info(struct vc5_screen *screen) +{ + struct drm_v3d_get_param ident0 = { + .param = DRM_V3D_PARAM_V3D_CORE0_IDENT0, + }; + struct drm_v3d_get_param ident1 = { + .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1, + }; + int ret; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident0); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n", + strerror(errno)); + return false; + } + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident1); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n", + strerror(errno)); + return false; + } + + uint32_t major = (ident0.value >> 24) & 0xff; + uint32_t minor = (ident1.value >> 0) & 0xf; + screen->devinfo.ver = major * 10 + minor; + + switch (screen->devinfo.ver) { + case 33: + case 41: + case 42: + break; + default: + fprintf(stderr, + "V3D %d.%d not supported by this version of Mesa.\n", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + return false; + } + + return true; +} + +static const void * +vc5_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &v3d_nir_options; +} + +struct pipe_screen * +v3d_screen_create(int fd) +{ + struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen); + struct pipe_screen *pscreen; + + pscreen = &screen->base; + + pscreen->destroy = vc5_screen_destroy; + pscreen->get_param = vc5_screen_get_param; + pscreen->get_paramf = vc5_screen_get_paramf; + pscreen->get_shader_param = vc5_screen_get_shader_param; + pscreen->context_create = vc5_context_create; + pscreen->is_format_supported = vc5_screen_is_format_supported; + + screen->fd = fd; + list_inithead(&screen->bo_cache.time_list); + (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); + +#if defined(USE_V3D_SIMULATOR) + vc5_simulator_init(screen); +#endif + + if (!vc5_get_device_info(screen)) + goto fail; + + slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16); + + vc5_fence_init(screen); + + v3d_process_debug_variable(); + + vc5_resource_screen_init(pscreen); + + screen->compiler = v3d_compiler_init(&screen->devinfo); + + pscreen->get_name = vc5_screen_get_name; + pscreen->get_vendor = vc5_screen_get_vendor; + pscreen->get_device_vendor = vc5_screen_get_vendor; + pscreen->get_compiler_options = vc5_screen_get_compiler_options; + + return pscreen; + +fail: + close(fd); + ralloc_free(pscreen); + return NULL; +} diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h new file mode 100644 index 00000000000..975bfe01a75 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_screen.h @@ -0,0 +1,101 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_SCREEN_H +#define VC5_SCREEN_H + +#include "pipe/p_screen.h" +#include "os/os_thread.h" +#include "state_tracker/drm_driver.h" +#include "util/list.h" +#include "util/slab.h" +#include "broadcom/common/v3d_debug.h" +#include "broadcom/common/v3d_device_info.h" + +struct vc5_bo; + +#define VC5_MAX_MIP_LEVELS 12 +#define VC5_MAX_TEXTURE_SAMPLERS 32 +#define VC5_MAX_SAMPLES 4 +#define VC5_MAX_DRAW_BUFFERS 4 +#define VC5_MAX_ATTRIBUTES 16 + +/* These are tunable parameters in the HW design, but all the V3D + * implementations agree. + */ +#define VC5_UIFCFG_BANKS 8 +#define VC5_UIFCFG_PAGE_SIZE 4096 +#define VC5_UIFCFG_XOR_VALUE (1 << 4) +#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS) +#define VC5_UBLOCK_SIZE 64 +#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE) +#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE) + +struct vc5_simulator_file; + +struct vc5_screen { + struct pipe_screen base; + int fd; + + struct v3d_device_info devinfo; + + const char *name; + + struct slab_parent_pool transfer_pool; + + struct vc5_bo_cache { + /** List of struct vc5_bo freed, by age. */ + struct list_head time_list; + /** List of struct vc5_bo freed, per size, by age. */ + struct list_head *size_list; + uint32_t size_list_size; + + mtx_t lock; + + uint32_t bo_size; + uint32_t bo_count; + } bo_cache; + + const struct v3d_compiler *compiler; + + struct util_hash_table *bo_handles; + mtx_t bo_handles_mutex; + + uint32_t bo_size; + uint32_t bo_count; + + struct vc5_simulator_file *sim_file; +}; + +static inline struct vc5_screen * +vc5_screen(struct pipe_screen *screen) +{ + return (struct vc5_screen *)screen; +} + +struct pipe_screen *v3d_screen_create(int fd); + +void +vc5_fence_init(struct vc5_screen *screen); + +#endif /* VC5_SCREEN_H */ diff --git a/src/gallium/drivers/v3d/v3d_simulator.c b/src/gallium/drivers/v3d/v3d_simulator.c new file mode 100644 index 00000000000..86e4ed3be3d --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator.c @@ -0,0 +1,660 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator.c + * + * Implements VC5 simulation on top of a non-VC5 GEM fd. + * + * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on + * top of the simpenrose software simulator. Generally, VC5 driver BOs have a + * GEM-side copy of their contents and a simulator-side memory area that the + * GEM contents get copied into during simulation. Once simulation is done, + * the simulator's data is copied back out to the GEM BOs, so that rendering + * appears on the screen as if actual hardware rendering had been done. + * + * One of the limitations of this code is that we shouldn't really need a + * GEM-side BO for non-window-system BOs. However, do we need unique BO + * handles for each of our GEM bos so that this file can look up its state + * from the handle passed in at submit ioctl time (also, a couple of places + * outside of this file still call ioctls directly on the fd). + * + * Another limitation is that BO import doesn't work unless the underlying + * window system's BO size matches what VC5 is going to use, which of course + * doesn't work out in practice. This means that for now, only DRI3 (VC5 + * makes the winsys BOs) is supported, not DRI2 (window system makes the winys + * BOs). + */ + +#ifdef USE_V3D_SIMULATOR + +#include <sys/mman.h> +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "util/u_memory.h" +#include "util/u_mm.h" +#include "v3d_simulator_wrapper.h" + +#include "v3d_screen.h" +#include "v3d_context.h" + +/** Global (across GEM fds) state for the simulator */ +static struct vc5_simulator_state { + mtx_t mutex; + + struct v3d_hw *v3d; + int ver; + + /* Base virtual address of the heap. */ + void *mem; + /* Base hardware address of the heap. */ + uint32_t mem_base; + /* Size of the heap. */ + size_t mem_size; + + struct mem_block *heap; + struct mem_block *overflow; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *fd_map; + + int refcount; +} sim_state = { + .mutex = _MTX_INITIALIZER_NP, +}; + +/** Per-GEM-fd state for the simulator. */ +struct vc5_simulator_file { + int fd; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *bo_map; + + struct mem_block *gmp; + void *gmp_vaddr; +}; + +/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */ +struct vc5_simulator_bo { + struct vc5_simulator_file *file; + + /** Area for this BO within sim_state->mem */ + struct mem_block *block; + uint32_t size; + void *vaddr; + + void *winsys_map; + uint32_t winsys_stride; + + int handle; +}; + +static void * +int_to_key(int key) +{ + return (void *)(uintptr_t)key; +} + +static struct vc5_simulator_file * +vc5_get_simulator_file_for_fd(int fd) +{ + struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map, + int_to_key(fd + 1)); + return entry ? entry->data : NULL; +} + +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + +/* 128kb */ +#define GMP_ALIGN2 17 + +/** + * Sets the range of GPU virtual address space to have the given GMP + * permissions (bit 0 = read, bit 1 = write, write-only forbidden). + */ +static void +set_gmp_flags(struct vc5_simulator_file *file, + uint32_t offset, uint32_t size, uint32_t flag) +{ + assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0); + int gmp_offset = offset >> GMP_ALIGN2; + int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2; + uint32_t *gmp = file->gmp_vaddr; + + assert(flag <= 0x3); + + for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) { + int32_t bitshift = (i % 16) * 2; + gmp[i / 16] &= ~(0x3 << bitshift); + gmp[i / 16] |= flag << bitshift; + } +} + +/** + * Allocates space in simulator memory and returns a tracking struct for it + * that also contains the drm_gem_cma_object struct. + */ +static struct vc5_simulator_bo * +vc5_create_simulator_bo(int fd, int handle, unsigned size) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = rzalloc(file, + struct vc5_simulator_bo); + size = align(size, 4096); + + sim_bo->file = file; + sim_bo->handle = handle; + + mtx_lock(&sim_state.mutex); + sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0); + mtx_unlock(&sim_state.mutex); + assert(sim_bo->block); + + set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); + + sim_bo->size = size; + sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; + memset(sim_bo->vaddr, 0xd0, size); + + *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; + + /* A handle of 0 is used for vc5_gem.c internal allocations that + * don't need to go in the lookup table. + */ + if (handle != 0) { + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(file->bo_map, int_to_key(handle), + sim_bo); + mtx_unlock(&sim_state.mutex); + } + + return sim_bo; +} + +static void +vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo) +{ + struct vc5_simulator_file *sim_file = sim_bo->file; + + if (sim_bo->winsys_map) + munmap(sim_bo->winsys_map, sim_bo->size); + + set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); + + mtx_lock(&sim_state.mutex); + u_mmFreeMem(sim_bo->block); + if (sim_bo->handle) { + struct hash_entry *entry = + _mesa_hash_table_search(sim_file->bo_map, + int_to_key(sim_bo->handle)); + _mesa_hash_table_remove(sim_file->bo_map, entry); + } + mtx_unlock(&sim_state.mutex); + ralloc_free(sim_bo); +} + +static struct vc5_simulator_bo * +vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle) +{ + mtx_lock(&sim_state.mutex); + struct hash_entry *entry = + _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle)); + mtx_unlock(&sim_state.mutex); + + return entry ? entry->data : NULL; +} + +static int +vc5_simulator_pin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + vc5_bo_map(bo); + memcpy(sim_bo->vaddr, bo->map, bo->size); + } + + return 0; +} + +static int +vc5_simulator_unpin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + if (*(uint32_t *)(sim_bo->vaddr + + sim_bo->size) != BO_SENTINEL) { + fprintf(stderr, "Buffer overflow in %s\n", bo->name); + } + + vc5_bo_map(bo); + memcpy(bo->map, sim_bo->vaddr, bo->size); + } + + return 0; +} + +#if 0 +static void +vc5_dump_to_file(struct vc5_exec_info *exec) +{ + static int dumpno = 0; + struct drm_vc5_get_hang_state *state; + struct drm_vc5_get_hang_state_bo *bo_state; + unsigned int dump_version = 0; + + if (!(vc5_debug & VC5_DEBUG_DUMP)) + return; + + state = calloc(1, sizeof(*state)); + + int unref_count = 0; + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + unref_count++; + } + + /* Add one more for the overflow area that isn't wrapped in a BO. */ + state->bo_count = exec->bo_count + unref_count + 1; + bo_state = calloc(state->bo_count, sizeof(*bo_state)); + + char *filename = NULL; + asprintf(&filename, "vc5-dri-%d.dump", dumpno++); + FILE *f = fopen(filename, "w+"); + if (!f) { + fprintf(stderr, "Couldn't open %s: %s", filename, + strerror(errno)); + return; + } + + fwrite(&dump_version, sizeof(dump_version), 1, f); + + state->ct0ca = exec->ct0ca; + state->ct0ea = exec->ct0ea; + state->ct1ca = exec->ct1ca; + state->ct1ea = exec->ct1ea; + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + fwrite(state, sizeof(*state), 1, f); + + int i; + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + bo_state[i].handle = i; /* Not used by the parser. */ + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + bo_state[i].handle = 0; + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + i++; + } + + /* Add the static overflow memory area. */ + bo_state[i].handle = exec->bo_count; + bo_state[i].paddr = sim_state.overflow->ofs; + bo_state[i].size = sim_state.overflow->size; + i++; + + fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + + for (int i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + void *overflow = calloc(1, sim_state.overflow->size); + fwrite(overflow, 1, sim_state.overflow->size, f); + free(overflow); + + free(state); + free(bo_state); + fclose(f); +} +#endif + +int +vc5_simulator_flush(struct vc5_context *vc5, + struct drm_v3d_submit_cl *submit, struct vc5_job *job) +{ + struct vc5_screen *screen = vc5->screen; + int fd = screen->fd; + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]); + struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL; + struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL; + uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; + uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; + uint32_t row_len = MIN2(sim_stride, winsys_stride); + int ret; + + if (ctex && csim_bo->winsys_map) { +#if 0 + fprintf(stderr, "%dx%d %d %d %d\n", + ctex->base.b.width0, ctex->base.b.height0, + winsys_stride, + sim_stride, + ctex->bo->size); +#endif + + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(ctex->bo->map + y * sim_stride, + csim_bo->winsys_map + y * winsys_stride, + row_len); + } + } + + ret = vc5_simulator_pin_bos(fd, job); + if (ret) + return ret; + + //vc5_dump_to_file(&exec); + + if (sim_state.ver >= 41) + v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + else + v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + + ret = vc5_simulator_unpin_bos(fd, job); + if (ret) + return ret; + + if (ctex && csim_bo->winsys_map) { + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(csim_bo->winsys_map + y * winsys_stride, + ctex->bo->map + y * sim_stride, + row_len); + } + } + + return 0; +} + +/** + * Map the underlying GEM object from the real hardware GEM handle. + */ +static void * +vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo) +{ + int ret; + void *map; + + struct drm_mode_map_dumb map_dumb = { + .handle = sim_bo->handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, map_dumb.offset); + if (map == MAP_FAILED) { + fprintf(stderr, + "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + sim_bo->handle, (long long)map_dumb.offset, + (int)sim_bo->size); + abort(); + } + + return map; +} + +/** + * Do fixups after a BO has been opened from a handle. + * + * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE + * time, but we're still using drmPrimeFDToHandle() so we have this helper to + * be called afterward instead. + */ +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size) +{ + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, handle, size); + + sim_bo->winsys_stride = winsys_stride; + sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo); +} + +/** + * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. + * + * Making a VC5 BO is just a matter of making a corresponding BO on the host. + */ +static int +vc5_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) +{ + int ret; + struct drm_mode_create_dumb create = { + .width = 128, + .bpp = 8, + .height = (args->size + 127) / 128, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + assert(create.size >= args->size); + + args->handle = create.handle; + + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, create.handle, args->size); + + args->offset = sim_bo->block->ofs; + + return ret; +} + +/** + * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. + * + * We just pass this straight through to dumb mmap. + */ +static int +vc5_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args) +{ + int ret; + struct drm_mode_map_dumb map = { + .handle = args->handle, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + args->offset = map.offset; + + return ret; +} + +static int +vc5_simulator_get_bo_offset_ioctl(int fd, struct drm_v3d_get_bo_offset *args) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + args->offset = sim_bo->block->ofs; + + return 0; +} + +static int +vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) +{ + /* Free the simulator's internal tracking. */ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + vc5_free_simulator_bo(sim_bo); + + /* Pass the call on down. */ + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); +} + +static int +vc5_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args) +{ + if (sim_state.ver >= 41) + return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); + else + return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); +} + +int +vc5_simulator_ioctl(int fd, unsigned long request, void *args) +{ + switch (request) { + case DRM_IOCTL_V3D_CREATE_BO: + return vc5_simulator_create_bo_ioctl(fd, args); + case DRM_IOCTL_V3D_MMAP_BO: + return vc5_simulator_mmap_bo_ioctl(fd, args); + case DRM_IOCTL_V3D_GET_BO_OFFSET: + return vc5_simulator_get_bo_offset_ioctl(fd, args); + + case DRM_IOCTL_V3D_WAIT_BO: + /* We do all of the vc5 rendering synchronously, so we just + * return immediately on the wait ioctls. This ignores any + * native rendering to the host BO, so it does mean we race on + * front buffer rendering. + */ + return 0; + + case DRM_IOCTL_V3D_GET_PARAM: + return vc5_simulator_get_param_ioctl(fd, args); + + case DRM_IOCTL_GEM_CLOSE: + return vc5_simulator_gem_close_ioctl(fd, args); + + case DRM_IOCTL_GEM_OPEN: + case DRM_IOCTL_GEM_FLINK: + return drmIoctl(fd, request, args); + default: + fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request); + abort(); + } +} + +static void +vc5_simulator_init_global(const struct v3d_device_info *devinfo) +{ + mtx_lock(&sim_state.mutex); + if (sim_state.refcount++) { + mtx_unlock(&sim_state.mutex); + return; + } + + sim_state.v3d = v3d_hw_auto_new(NULL); + v3d_hw_alloc_mem(sim_state.v3d, 1024 * 1024 * 1024); + sim_state.mem_base = + v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size, + &sim_state.mem); + + /* Allocate from anywhere from 4096 up. We don't allocate at 0, + * because for OQs and some other addresses in the HW, 0 means + * disabled. + */ + sim_state.heap = u_mmInit(4096, sim_state.mem_size - 4096); + + /* Make a block of 0xd0 at address 0 to make sure we don't screw up + * and land there. + */ + struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0); + memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096); + + sim_state.ver = v3d_hw_get_version(sim_state.v3d); + + mtx_unlock(&sim_state.mutex); + + sim_state.fd_map = + _mesa_hash_table_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + if (sim_state.ver >= 41) + v3d41_simulator_init_regs(sim_state.v3d); + else + v3d33_simulator_init_regs(sim_state.v3d); +} + +void +vc5_simulator_init(struct vc5_screen *screen) +{ + vc5_simulator_init_global(&screen->devinfo); + + screen->sim_file = rzalloc(screen, struct vc5_simulator_file); + struct vc5_simulator_file *sim_file = screen->sim_file; + + screen->sim_file->bo_map = + _mesa_hash_table_create(screen->sim_file, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1), + screen->sim_file); + mtx_unlock(&sim_state.mutex); + + sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0); + sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs - + sim_state.mem_base); +} + +void +vc5_simulator_destroy(struct vc5_screen *screen) +{ + mtx_lock(&sim_state.mutex); + if (!--sim_state.refcount) { + _mesa_hash_table_destroy(sim_state.fd_map, NULL); + u_mmDestroy(sim_state.heap); + /* No memsetting the struct, because it contains the mutex. */ + sim_state.mem = NULL; + } + mtx_unlock(&sim_state.mutex); +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp new file mode 100644 index 00000000000..7b04ded2b53 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp @@ -0,0 +1,88 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file + * + * Wraps bits of the V3D simulator interface in a C interface for the + * v3d_simulator.c code to use. + */ + +#ifdef USE_V3D_SIMULATOR + +#include "v3d_simulator_wrapper.h" + +#define V3D_TECH_VERSION 3 +#define V3D_REVISION 3 +#define V3D_SUB_REV 0 +#define V3D_HIDDEN_REV 0 +#define V3D_COMPAT_REV 0 +#include "v3d_hw_auto.h" + +extern "C" { + +struct v3d_hw *v3d_hw_auto_new(void *in_params) +{ + return v3d_hw_auto_make_unique().release(); +} + + +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p) +{ + return hw->get_mem(size, p); +} + +bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size) +{ + return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS; +} + +bool v3d_hw_has_gca(struct v3d_hw *hw) +{ + return hw->has_gca(); +} + +uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg) +{ + return hw->read_reg(reg); +} + +void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val) +{ + hw->write_reg(reg, val); +} + +void v3d_hw_tick(struct v3d_hw *hw) +{ + return hw->tick(); +} + +int v3d_hw_get_version(struct v3d_hw *hw) +{ + const V3D_HUB_IDENT_T *ident = hw->get_hub_ident(); + + return ident->tech_version * 10 + ident->revision; +} + +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.h b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h new file mode 100644 index 00000000000..8b5dca15ed9 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdint.h> +#include <stdlib.h> + +struct v3d_hw; + +#ifdef __cplusplus +extern "C" { +#endif + +struct v3d_hw *v3d_hw_auto_new(void *params); +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p); +bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size); +bool v3d_hw_has_gca(struct v3d_hw *hw); +uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg); +void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val); +void v3d_hw_tick(struct v3d_hw *hw); +int v3d_hw_get_version(struct v3d_hw *hw); + +#ifdef __cplusplus +} +#endif diff --git a/src/gallium/drivers/v3d/v3d_tiling.c b/src/gallium/drivers/v3d/v3d_tiling.c new file mode 100644 index 00000000000..f9c4a342184 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_tiling.c @@ -0,0 +1,389 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_tiling.c + * + * Handles information about the VC5 tiling formats, and loading and storing + * from them. + */ + +#include <stdint.h> +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_tiling.h" + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + + assert(x < utile_w && y < utile_h); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, + bool do_xor) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t mb_width = utile_w * 2; + uint32_t mb_height = utile_h * 2; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + if (do_xor && (mb_x / 4) & 1) + mb_y ^= 0x10; + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < utile_h; + bool left = mb_pixel_x < utile_w; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t utile_x = mb_pixel_x & (utile_w - 1); + uint32_t utile_y = mb_pixel_y & (utile_h - 1); + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + vc5_get_utile_pixel_offset(cpp, + utile_x, + utile_y)); + + return mb_pixel_address; +} + +static inline uint32_t +vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true); +} + +static inline uint32_t +vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false); +} + +static inline void +vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +static inline void +vc5_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum vc5_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case VC5_TILING_UIF_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UIF_NO_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_no_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_2_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_2_column_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_1_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_1_column_pixel_offset, + is_load); + break; + case VC5_TILING_LINEARTILE: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/src/gallium/drivers/v3d/v3d_tiling.h b/src/gallium/drivers/v3d/v3d_tiling.h new file mode 100644 index 00000000000..d3cf48c4527 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_tiling.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_TILING_H +#define VC5_TILING_H + +uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST; +uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST; +bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; +void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); +void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); +void vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); +void vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); + +#endif /* VC5_TILING_H */ diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c new file mode 100644 index 00000000000..c7a39b50a74 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -0,0 +1,489 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_pack_color.h" +#include "util/format_srgb.h" + +#include "v3d_context.h" +#include "compiler/v3d_compiler.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#if 0 + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +static void +write_texture_border_color(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_state *sampler = texstate->samplers[unit]; + struct pipe_sampler_view *texture = texstate->textures[unit]; + struct vc5_resource *rsc = vc5_resource(texture->texture); + union util_color uc; + + const struct util_format_description *tex_format_desc = + util_format_description(texture->format); + + float border_color[4]; + for (int i = 0; i < 4; i++) + border_color[i] = sampler->border_color.f[i]; + if (util_format_is_srgb(texture->format)) { + for (int i = 0; i < 3; i++) + border_color[i] = + util_format_linear_to_srgb_float(border_color[i]); + } + + /* Turn the border color into the layout of channels that it would + * have when stored as texture contents. + */ + float storage_color[4]; + util_format_unswizzle_4f(storage_color, + border_color, + tex_format_desc->swizzle); + + /* Now, pack so that when the vc5_format-sampled texture contents are + * replaced with our border color, the vc5_get_format_swizzle() + * swizzling will get the right channels. + */ + if (util_format_is_depth_or_stencil(texture->format)) { + uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, + sampler->border_color.f[0]) << 8; + } else { + switch (rsc->vc5_format) { + default: + case VC5_TEXTURE_TYPE_RGBA8888: + util_pack_color(storage_color, + PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGBA4444: + util_pack_color(storage_color, + PIPE_FORMAT_A8B8G8R8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGB565: + util_pack_color(storage_color, + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_ALPHA: + uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; + break; + case VC5_TEXTURE_TYPE_LUMALPHA: + uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | + (float_to_ubyte(storage_color[0]) << 0)); + break; + } + } + + cl_aligned_u32(uniforms, uc.ui[0]); +} +#endif + +static uint32_t +get_texrect_scale(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + uint32_t dim; + + if (contents == QUNIFORM_TEXRECT_SCALE_X) + dim = texture->texture->width0; + else + dim = texture->texture->height0; + + return fui(1.0f / dim); +} + +static uint32_t +get_texture_size(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + + switch (contents) { + case QUNIFORM_TEXTURE_WIDTH: + return u_minify(texture->texture->width0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_HEIGHT: + return u_minify(texture->texture->height0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_DEPTH: + return u_minify(texture->texture->depth0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_ARRAY_SIZE: + return texture->texture->array_size; + case QUNIFORM_TEXTURE_LEVELS: + return (texture->u.tex.last_level - + texture->u.tex.first_level) + 1; + default: + unreachable("Bad texture size field"); + } +} + +static struct vc5_bo * +vc5_upload_ubo(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + const uint32_t *gallium_uniforms) +{ + if (!shader->prog_data.base->ubo_size) + return NULL; + + struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen, + shader->prog_data.base->ubo_size, + "ubo"); + void *data = vc5_bo_map(ubo); + for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) { + memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset, + ((const void *)gallium_uniforms + + shader->prog_data.base->ubo_ranges[i].src_offset), + shader->prog_data.base->ubo_ranges[i].size); + } + + return ubo; +} + +/** + * Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter. + * + * Some bits of this field are dependent on the type of sample being done by + * the shader, while other bits are dependent on the sampler state. We OR the + * two together here. + */ +static void +write_texture_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit, + uint32_t shader_data) +{ + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_u32(uniforms, shader_data | sampler->p0); +} + +/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */ +static void +write_texture_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p1 from the bottom. + */ + uint32_t unit = data >> 5; + uint32_t p1 = data & 0x1f; + + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + .texture_state_record_base_address = texstate->texture_state[unit], + }; + + uint32_t packed; + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, + (uint8_t *)&packed, + &unpacked); + + cl_aligned_u32(uniforms, p1 | packed | sview->p1); +} + +/** Writes the V3D 4.x TMU configuration parameter 0. */ +static void +write_tmu_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p0 from the bottom. + */ + uint32_t unit = data >> 24; + uint32_t p0 = data & 0x00ffffff; + + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct vc5_resource *rsc = vc5_resource(psview->texture); + + cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0); + vc5_job_add_bo(job, rsc->bo); +} + +/** Writes the V3D 4.x TMU configuration parameter 1. */ +static void +write_tmu_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p1 from the bottom. + */ + uint32_t unit = data >> 24; + uint32_t p0 = data & 0x00ffffff; + + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0); +} + +struct vc5_cl_reloc +vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate) +{ + struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; + struct vc5_job *job = vc5->job; + const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; + struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms); + + /* We always need to return some space for uniforms, because the HW + * will be prefetching, even if we don't read any in the program. + */ + vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4); + + struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect); + vc5_bo_reference(uniform_stream.bo); + + struct vc5_cl_out *uniforms = + cl_start(&job->indirect); + + for (int i = 0; i < uinfo->count; i++) { + + switch (uinfo->contents[i]) { + case QUNIFORM_CONSTANT: + cl_aligned_u32(&uniforms, uinfo->data[i]); + break; + case QUNIFORM_UNIFORM: + cl_aligned_u32(&uniforms, + gallium_uniforms[uinfo->data[i]]); + break; + case QUNIFORM_VIEWPORT_X_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f); + break; + case QUNIFORM_VIEWPORT_Y_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f); + break; + + case QUNIFORM_VIEWPORT_Z_OFFSET: + cl_aligned_f(&uniforms, vc5->viewport.translate[2]); + break; + case QUNIFORM_VIEWPORT_Z_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[2]); + break; + + case QUNIFORM_USER_CLIP_PLANE: + cl_aligned_f(&uniforms, + vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + break; + + case QUNIFORM_TMU_CONFIG_P0: + write_tmu_p0(job, &uniforms, texstate, + uinfo->data[i]); + break; + + case QUNIFORM_TMU_CONFIG_P1: + write_tmu_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + write_texture_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + +#if 0 + case QUNIFORM_TEXTURE_FIRST_LEVEL: + write_texture_first_level(job, &uniforms, texstate, + uinfo->data[i]); + break; +#endif + + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + cl_aligned_u32(&uniforms, + get_texrect_scale(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + cl_aligned_u32(&uniforms, + get_texture_size(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_STENCIL: + cl_aligned_u32(&uniforms, + vc5->zsa->stencil_uniforms[uinfo->data[i]] | + (uinfo->data[i] <= 1 ? + (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) : + 0)); + break; + + case QUNIFORM_ALPHA_REF: + cl_aligned_f(&uniforms, + vc5->zsa->base.alpha.ref_value); + break; + + case QUNIFORM_SAMPLE_MASK: + cl_aligned_u32(&uniforms, vc5->sample_mask); + break; + + case QUNIFORM_UBO_ADDR: + if (uinfo->data[i] == 0) { + cl_aligned_reloc(&job->indirect, &uniforms, + ubo, 0); + } else { + int ubo_index = uinfo->data[i]; + struct vc5_resource *rsc = + vc5_resource(cb->cb[ubo_index].buffer); + + cl_aligned_reloc(&job->indirect, &uniforms, + rsc->bo, + cb->cb[ubo_index].buffer_offset); + } + break; + + case QUNIFORM_TEXTURE_FIRST_LEVEL: + cl_aligned_f(&uniforms, + texstate->textures[uinfo->data[i]]->u.tex.first_level); + break; + + case QUNIFORM_TEXTURE_BORDER_COLOR: + /* XXX */ + break; + + case QUNIFORM_SPILL_OFFSET: + cl_aligned_reloc(&job->indirect, &uniforms, + vc5->prog.spill_bo, 0); + break; + + case QUNIFORM_SPILL_SIZE_PER_THREAD: + cl_aligned_u32(&uniforms, + vc5->prog.spill_size_per_thread); + break; + + default: + assert(quniform_contents_is_texture_p0(uinfo->contents[i])); + + write_texture_p0(job, &uniforms, texstate, + uinfo->contents[i] - + QUNIFORM_TEXTURE_CONFIG_P0_0, + uinfo->data[i]); + break; + + } +#if 0 + uint32_t written_val = *((uint32_t *)uniforms - 1); + fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", + shader, i, __gen_address_offset(&uniform_stream) + i * 4, + written_val, uif(written_val)); +#endif + } + + cl_end(&job->indirect, uniforms); + + vc5_bo_unreference(&ubo); + + return uniform_stream; +} + +void +vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader) +{ + uint32_t dirty = 0; + + for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) { + switch (shader->prog_data.base->uniforms.contents[i]) { + case QUNIFORM_CONSTANT: + break; + case QUNIFORM_UNIFORM: + case QUNIFORM_UBO_ADDR: + dirty |= VC5_DIRTY_CONSTBUF; + break; + + case QUNIFORM_VIEWPORT_X_SCALE: + case QUNIFORM_VIEWPORT_Y_SCALE: + case QUNIFORM_VIEWPORT_Z_OFFSET: + case QUNIFORM_VIEWPORT_Z_SCALE: + dirty |= VC5_DIRTY_VIEWPORT; + break; + + case QUNIFORM_USER_CLIP_PLANE: + dirty |= VC5_DIRTY_CLIP; + break; + + case QUNIFORM_TMU_CONFIG_P0: + case QUNIFORM_TMU_CONFIG_P1: + case QUNIFORM_TEXTURE_CONFIG_P1: + case QUNIFORM_TEXTURE_BORDER_COLOR: + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + case QUNIFORM_SPILL_OFFSET: + case QUNIFORM_SPILL_SIZE_PER_THREAD: + /* We could flag this on just the stage we're + * compiling for, but it's not passed in. + */ + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + + case QUNIFORM_STENCIL: + case QUNIFORM_ALPHA_REF: + dirty |= VC5_DIRTY_ZSA; + break; + + case QUNIFORM_SAMPLE_MASK: + dirty |= VC5_DIRTY_SAMPLE_MASK; + break; + + default: + assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + } + } + + shader->uniform_dirty_bits = dirty; +} diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h new file mode 100644 index 00000000000..faeda2c0fbb --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_context.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This file generates the per-v3d-version function prototypes. It must only + * be included from v3d_context.h. + */ + +struct v3d_hw; +struct vc5_format; + +void v3dX(emit_state)(struct pipe_context *pctx); +void v3dX(emit_rcl)(struct vc5_job *job); +void v3dX(draw_init)(struct pipe_context *pctx); +void v3dX(state_init)(struct pipe_context *pctx); + +void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job); + +void v3dX(simulator_init_regs)(struct v3d_hw *v3d); +int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_get_param *args); +void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, + uint32_t gmp_ofs); +const struct vc5_format *v3dX(get_format_desc)(enum pipe_format f); +void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp); diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c new file mode 100644 index 00000000000..03ee6b2b196 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -0,0 +1,714 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blitter.h" +#include "util/u_prim.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" +#include "util/u_prim_restart.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" + +#include "v3d_context.h" +#include "v3d_resource.h" +#include "v3d_cl.h" +#include "broadcom/compiler/v3d_compiler.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +/** + * Does the initial bining command list setup for drawing to a given FBO. + */ +static void +vc5_start_draw(struct vc5_context *vc5) +{ + struct vc5_job *job = vc5->job; + + if (job->needs_flush) + return; + + /* Get space to emit our BCL state, using a branch to jump to a new BO + * if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + job->submit.bcl_start = job->bcl.bo->offset; + vc5_job_add_bo(job, job->bcl.bo); + + job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); + uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64; + job->tile_state = vc5_bo_alloc(vc5->screen, + job->draw_tiles_y * + job->draw_tiles_x * + tsda_per_tile_size, + "TSDA"); + +#if V3D_VERSION < 40 + /* "Binning mode lists start with a Tile Binning Mode Configuration + * item (120)" + * + * Part1 signals the end of binning config setup. + */ + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) { + config.tile_allocation_memory_address = + cl_address(job->tile_alloc, 0); + config.tile_allocation_memory_size = job->tile_alloc->size; + } +#endif + + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { +#if V3D_VERSION >= 40 + config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1; + config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1; + config.number_of_render_targets_minus_1 = + MAX2(vc5->framebuffer.nr_cbufs, 1) - 1; +#else /* V3D_VERSION < 40 */ + config.tile_state_data_array_base_address = + cl_address(job->tile_state, 0); + + config.width_in_tiles = job->draw_tiles_x; + config.height_in_tiles = job->draw_tiles_y; + /* Must be >= 1 */ + config.number_of_render_targets = + MAX2(vc5->framebuffer.nr_cbufs, 1); +#endif /* V3D_VERSION < 40 */ + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + + /* Disable any leftover OQ state from another job. */ + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); + + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); + + job->needs_flush = true; + job->draw_width = vc5->framebuffer.width; + job->draw_height = vc5->framebuffer.height; +} + +static void +vc5_predraw_check_textures(struct pipe_context *pctx, + struct vc5_texture_stateobj *stage_tex) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + for (int i = 0; i < stage_tex->num_textures; i++) { + struct pipe_sampler_view *view = stage_tex->textures[i]; + if (!view) + continue; + + vc5_flush_jobs_writing_resource(vc5, view->texture); + } +} + +static void +vc5_emit_gl_shader_state(struct vc5_context *vc5, + const struct pipe_draw_info *info) +{ + struct vc5_job *job = vc5->job; + /* VC5_DIRTY_VTXSTATE */ + struct vc5_vertex_stateobj *vtx = vc5->vtx; + /* VC5_DIRTY_VTXBUF */ + struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf; + + /* Upload the uniforms to the indirect CL first */ + struct vc5_cl_reloc fs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.fs, + &vc5->constbuf[PIPE_SHADER_FRAGMENT], + &vc5->fragtex); + struct vc5_cl_reloc vs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.vs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + struct vc5_cl_reloc cs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.cs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + + /* See GFXH-930 workaround below */ + uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1); + uint32_t shader_rec_offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(GL_SHADER_STATE_RECORD) + + num_elements_to_emit * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + + cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ + shader.point_size_in_shaded_vertex_data = + (info->mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = + (vc5->prog.fs->prog_data.fs->writes_z || + vc5->prog.fs->prog_data.fs->discard); + + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w; + + shader.number_of_varyings_in_fragment_shader = + vc5->prog.fs->prog_data.base->num_inputs; + + shader.propagate_nans = true; + + shader.coordinate_shader_code_address = + cl_address(vc5->prog.cs->bo, 0); + shader.vertex_shader_code_address = + cl_address(vc5->prog.vs->bo, 0); + shader.fragment_shader_code_address = + cl_address(vc5->prog.fs->bo, 0); + + /* XXX: Use combined input/output size flag in the common + * case. + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; + shader.coordinate_shader_input_vpm_segment_size = + MAX2(vc5->prog.cs->prog_data.vs->vpm_input_size, 1); + shader.vertex_shader_input_vpm_segment_size = + MAX2(vc5->prog.vs->prog_data.vs->vpm_input_size, 1); + + shader.coordinate_shader_output_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_output_size; + + shader.coordinate_shader_uniforms_address = cs_uniforms; + shader.vertex_shader_uniforms_address = vs_uniforms; + shader.fragment_shader_uniforms_address = fs_uniforms; + +#if V3D_VERSION >= 41 + shader.coordinate_shader_4_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 4; + shader.vertex_shader_4_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 4; + shader.fragment_shader_4_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 4; + + shader.coordinate_shader_start_in_final_thread_section = + vc5->prog.cs->prog_data.vs->base.single_seg; + shader.vertex_shader_start_in_final_thread_section = + vc5->prog.vs->prog_data.vs->base.single_seg; + shader.fragment_shader_start_in_final_thread_section = + vc5->prog.fs->prog_data.fs->base.single_seg; +#else + shader.coordinate_shader_4_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 4; + shader.coordinate_shader_2_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 2; + shader.vertex_shader_4_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 4; + shader.vertex_shader_2_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 2; + shader.fragment_shader_4_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 4; + shader.fragment_shader_2_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 2; +#endif + + shader.vertex_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_iid; + + shader.address_of_default_attribute_values = + cl_address(vtx->default_attribute_values, 0); + } + + for (int i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + struct vc5_resource *rsc = vc5_resource(vb->buffer.resource); + + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + cl_emit_with_prepacked(&job->indirect, + GL_SHADER_STATE_ATTRIBUTE_RECORD, + &vtx->attrs[i * size], attr) { + attr.stride = vb->stride; + attr.address = cl_address(rsc->bo, + vb->buffer_offset + + elem->src_offset); + attr.number_of_values_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->vattr_sizes[i]; + attr.number_of_values_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->vattr_sizes[i]; +#if V3D_VERSION >= 41 + attr.maximum_index = 0xffffff; +#endif + } + } + + if (vtx->num_elements == 0) { + /* GFXH-930: At least one attribute must be enabled and read + * by CS and VS. If we have no attributes being consumed by + * the shader, set up a dummy to be loaded into the VPM. + */ + cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* Valid address of data whose value will be unused. */ + attr.address = cl_address(job->indirect.bo, 0); + + attr.type = ATTRIBUTE_FLOAT; + attr.stride = 0; + attr.vec_size = 1; + + attr.number_of_values_read_by_coordinate_shader = 1; + attr.number_of_values_read_by_vertex_shader = 1; + } + } + + cl_emit(&job->bcl, GL_SHADER_STATE, state) { + state.address = cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = num_elements_to_emit; + } + + vc5_bo_unreference(&cs_uniforms.bo); + vc5_bo_unreference(&vs_uniforms.bo); + vc5_bo_unreference(&fs_uniforms.bo); + + job->shader_rec_count++; +} + +/** + * Computes the various transform feedback statistics, since they can't be + * recorded by CL packets. + */ +static void +vc5_tf_statistics_record(struct vc5_context *vc5, + const struct pipe_draw_info *info, + bool prim_tf) +{ + if (!vc5->active_queries) + return; + + uint32_t prims = u_prims_for_vertices(info->mode, info->count); + vc5->prims_generated += prims; + + if (prim_tf) { + /* XXX: Only count if we didn't overflow. */ + vc5->tf_prims_generated += prims; + } +} + +static void +vc5_update_job_ez(struct vc5_context *vc5, struct vc5_job *job) +{ + switch (vc5->zsa->ez_state) { + case VC5_EZ_UNDECIDED: + /* If the Z/S state didn't pick a direction but didn't + * disable, then go along with the current EZ state. This + * allows EZ optimization for Z func == EQUAL or NEVER. + */ + break; + + case VC5_EZ_LT_LE: + case VC5_EZ_GT_GE: + /* If the Z/S state picked a direction, then it needs to match + * the current direction if we've decided on one. + */ + if (job->ez_state == VC5_EZ_UNDECIDED) + job->ez_state = vc5->zsa->ez_state; + else if (job->ez_state != vc5->zsa->ez_state) + job->ez_state = VC5_EZ_DISABLED; + break; + + case VC5_EZ_DISABLED: + /* If the current Z/S state disables EZ because of a bad Z + * func or stencil operation, then we can't do any more EZ in + * this frame. + */ + job->ez_state = VC5_EZ_DISABLED; + break; + } + + /* If the FS affects the Z of the pixels, then it may update against + * the chosen EZ direction (though we could use + * ARB_conservative_depth's hints to avoid this) + */ + if (vc5->prog.fs->prog_data.fs->writes_z) { + job->ez_state = VC5_EZ_DISABLED; + } + + if (job->first_ez_state == VC5_EZ_UNDECIDED) + job->first_ez_state = job->ez_state; +} + +static void +vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + if (!info->count_from_stream_output && !info->indirect && + !info->primitive_restart && + !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) + return; + + /* Fall back for weird desktop GL primitive restart values. */ + if (info->primitive_restart && + info->index_size) { + uint32_t mask = ~0; + + switch (info->index_size) { + case 2: + mask = 0xffff; + break; + case 1: + mask = 0xff; + break; + } + + if (info->restart_index != mask) { + util_draw_vbo_without_prim_restart(pctx, info); + return; + } + } + + if (info->mode >= PIPE_PRIM_QUADS) { + util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base); + util_primconvert_draw_vbo(vc5->primconvert, info); + perf_debug("Fallback conversion for %d %s vertices\n", + info->count, u_prim_name(info->mode)); + return; + } + + /* Before setting up the draw, flush anything writing to the textures + * that we read from. + */ + vc5_predraw_check_textures(pctx, &vc5->verttex); + vc5_predraw_check_textures(pctx, &vc5->fragtex); + + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* Get space to emit our draw call into the BCL, using a branch to + * jump to a new BO if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + if (vc5->prim_mode != info->mode) { + vc5->prim_mode = info->mode; + vc5->dirty |= VC5_DIRTY_PRIM_MODE; + } + + vc5_start_draw(vc5); + vc5_update_compiled_shaders(vc5, info->mode); + vc5_update_job_ez(vc5, job); + +#if V3D_VERSION >= 41 + v3d41_emit_state(pctx); +#else + v3d33_emit_state(pctx); +#endif + + if (vc5->dirty & (VC5_DIRTY_VTXBUF | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_COMPILED_CS | + VC5_DIRTY_COMPILED_VS | + VC5_DIRTY_COMPILED_FS | + vc5->prog.cs->uniform_dirty_bits | + vc5->prog.vs->uniform_dirty_bits | + vc5->prog.fs->uniform_dirty_bits)) { + vc5_emit_gl_shader_state(vc5, info); + } + + vc5->dirty = 0; + + /* The Base Vertex/Base Instance packet sets those values to nonzero + * for the next draw call only. + */ + if (info->index_bias || info->start_instance) { + cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { + base.base_instance = info->start_instance; + base.base_vertex = info->index_bias; + } + } + + uint32_t prim_tf_enable = 0; +#if V3D_VERSION < 40 + /* V3D 3.x: The HW only processes transform feedback on primitives + * with the flag set. + */ + if (vc5->streamout.num_targets) + prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); +#endif + + vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets); + + /* Note that the primitive type fields match with OpenGL/gallium + * definitions, up to but not including QUADS. + */ + if (info->index_size) { + uint32_t index_size = info->index_size; + uint32_t offset = info->start * index_size; + struct pipe_resource *prsc; + if (info->has_user_indices) { + prsc = NULL; + u_upload_data(vc5->uploader, 0, + info->count * info->index_size, 4, + info->index.user, + &offset, &prsc); + } else { + prsc = info->index.resource; + } + struct vc5_resource *rsc = vc5_resource(prsc); + +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { + ib.address = cl_address(rsc->bo, 0); + ib.size = rsc->bo->size; + } +#endif + + if (info->instance_count > 1) { + cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.length = info->count; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + } + } + + job->draw_calls_queued++; + + if (info->has_user_indices) + pipe_resource_reference(&prsc, NULL); + } else { + if (info->instance_count > 1) { + cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.index_of_first_vertex = info->start; + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.length = info->count; + prim.index_of_first_vertex = info->start; + } + } + } + job->draw_calls_queued++; + + if (vc5->zsa && job->zsbuf && + (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + vc5_job_add_bo(job, rsc->bo); + + if (vc5->zsa->base.depth.enabled) { + job->resolve |= PIPE_CLEAR_DEPTH; + rsc->initialized_buffers = PIPE_CLEAR_DEPTH; + } + + if (vc5->zsa->base.stencil[0].enabled) { + job->resolve |= PIPE_CLEAR_STENCIL; + rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + + if (job->resolve & bit || !job->cbufs[i]) + continue; + struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture); + + job->resolve |= bit; + vc5_job_add_bo(job, rsc->bo); + } + + if (job->referenced_size > 768 * 1024 * 1024) { + perf_debug("Flushing job with %dkb to try to free up memory\n", + job->referenced_size / 1024); + vc5_flush(pctx); + } + + if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) + vc5_flush(pctx); +} + +static void +vc5_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* We can't flag new buffers for clearing once we've queued draws. We + * could avoid this by using the 3d engine to clear. + */ + if (job->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); + vc5_job_submit(vc5, job); + job = vc5_get_job_for_fbo(vc5); + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(buffers & bit)) + continue; + + struct pipe_surface *psurf = vc5->framebuffer.cbufs[i]; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + union util_color uc; + uint32_t internal_size = 4 << surf->internal_bpp; + + static union pipe_color_union swapped_color; + if (vc5->swap_color_rb & (1 << i)) { + swapped_color.f[0] = color->f[2]; + swapped_color.f[1] = color->f[1]; + swapped_color.f[2] = color->f[0]; + swapped_color.f[3] = color->f[3]; + color = &swapped_color; + } + + switch (surf->internal_type) { + case V3D_INTERNAL_TYPE_8: + util_pack_color(color->f, PIPE_FORMAT_R8G8B8A8_UNORM, + &uc); + memcpy(job->clear_color[i], uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_8I: + case V3D_INTERNAL_TYPE_8UI: + job->clear_color[i][0] = ((color->ui[0] & 0xff) | + (color->ui[1] & 0xff) << 8 | + (color->ui[2] & 0xff) << 16 | + (color->ui[3] & 0xff) << 24); + break; + case V3D_INTERNAL_TYPE_16F: + util_pack_color(color->f, PIPE_FORMAT_R16G16B16A16_FLOAT, + &uc); + memcpy(job->clear_color[i], uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_16I: + case V3D_INTERNAL_TYPE_16UI: + job->clear_color[i][0] = ((color->ui[0] & 0xffff) | + color->ui[1] << 16); + job->clear_color[i][1] = ((color->ui[2] & 0xffff) | + color->ui[3] << 16); + break; + case V3D_INTERNAL_TYPE_32F: + case V3D_INTERNAL_TYPE_32I: + case V3D_INTERNAL_TYPE_32UI: + memcpy(job->clear_color[i], color->ui, internal_size); + break; + } + + rsc->initialized_buffers |= bit; + } + + unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; + if (zsclear) { + struct vc5_resource *rsc = + vc5_resource(vc5->framebuffer.zsbuf->texture); + + if (zsclear & PIPE_CLEAR_DEPTH) + job->clear_z = depth; + if (zsclear & PIPE_CLEAR_STENCIL) + job->clear_s = stencil; + + rsc->initialized_buffers |= zsclear; + } + + job->draw_min_x = 0; + job->draw_min_y = 0; + job->draw_max_x = vc5->framebuffer.width; + job->draw_max_y = vc5->framebuffer.height; + job->cleared |= buffers; + job->resolve |= buffers; + + vc5_start_draw(vc5); +} + +static void +vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear RT\n"); +} + +static void +vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear DS\n"); +} + +void +v3dX(draw_init)(struct pipe_context *pctx) +{ + pctx->draw_vbo = vc5_draw_vbo; + pctx->clear = vc5_clear; + pctx->clear_render_target = vc5_clear_render_target; + pctx->clear_depth_stencil = vc5_clear_depth_stencil; +} diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c new file mode 100644 index 00000000000..e2aba356de4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_emit.c @@ -0,0 +1,722 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_half.h" +#include "v3d_context.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +static uint8_t +vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one) +{ + /* We may get a bad blendfactor when blending is disabled. */ + if (factor == 0) + return V3D_BLEND_FACTOR_ZERO; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return V3D_BLEND_FACTOR_ZERO; + case PIPE_BLENDFACTOR_ONE: + return V3D_BLEND_FACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V3D_BLEND_FACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V3D_BLEND_FACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return V3D_BLEND_FACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V3D_BLEND_FACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V3D_BLEND_FACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V3D_BLEND_FACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ONE : + V3D_BLEND_FACTOR_DST_ALPHA); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ZERO : + V3D_BLEND_FACTOR_INV_DST_ALPHA); + case PIPE_BLENDFACTOR_CONST_COLOR: + return V3D_BLEND_FACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V3D_BLEND_FACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V3D_BLEND_FACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V3D_BLEND_FACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Bad blend factor"); + } +} + +static inline uint16_t +swizzled_border_color(const struct v3d_device_info *devinfo, + struct pipe_sampler_state *sampler, + struct vc5_sampler_view *sview, + int chan) +{ + const struct util_format_description *desc = + util_format_description(sview->base.format); + uint8_t swiz = chan; + + /* If we're doing swizzling in the sampler, then only rearrange the + * border color for the mismatch between the VC5 texture format and + * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by + * the sampler's swizzle. + * + * For swizzling in the shader, we don't do any pre-swizzling of the + * border color. + */ + if (vc5_get_tex_return_size(devinfo, sview->base.format, + sampler->compare_mode) != 32) + swiz = desc->swizzle[swiz]; + + switch (swiz) { + case PIPE_SWIZZLE_0: + return util_float_to_half(0.0); + case PIPE_SWIZZLE_1: + return util_float_to_half(1.0); + default: + return util_float_to_half(sampler->border_color.f[swiz]); + } +} + +#if V3D_VERSION < 40 +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +static void +emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, + int i) +{ + struct vc5_job *job = vc5->job; + struct pipe_sampler_state *psampler = stage_tex->samplers[i]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + struct pipe_sampler_view *psview = stage_tex->textures[i]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct pipe_resource *prsc = psview->texture; + struct vc5_resource *rsc = vc5_resource(prsc); + const struct v3d_device_info *devinfo = &vc5->screen->devinfo; + + stage_tex->texture_state[i].offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(TEXTURE_SHADER_STATE), + 32); + vc5_bo_set_reference(&stage_tex->texture_state[i].bo, + job->indirect.bo); + + uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format, + psampler->compare_mode); + + struct V3D33_TEXTURE_SHADER_STATE unpacked = { + /* XXX */ + .border_color_red = swizzled_border_color(devinfo, psampler, + sview, 0), + .border_color_green = swizzled_border_color(devinfo, psampler, + sview, 1), + .border_color_blue = swizzled_border_color(devinfo, psampler, + sview, 2), + .border_color_alpha = swizzled_border_color(devinfo, psampler, + sview, 3), + + /* In the normal texturing path, the LOD gets clamped between + * min/max, and the base_level field (set in the sampler view + * from first_level) only decides where the min/mag switch + * happens, so we need to use the LOD clamps to keep us + * between min and max. + * + * For txf, the LOD clamp is still used, despite GL not + * wanting that. We will need to have a separate + * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to + * support txf properly. + */ + .min_level_of_detail = MIN2(psview->u.tex.first_level + + MAX2(psampler->min_lod, 0), + psview->u.tex.last_level), + .max_level_of_detail = MIN2(psview->u.tex.first_level + + psampler->max_lod, + psview->u.tex.last_level), + + .texture_base_pointer = cl_address(rsc->bo, + rsc->slices[0].offset), + + .output_32_bit = return_size == 32, + }; + + /* Set up the sampler swizzle if we're doing 16-bit sampling. For + * 32-bit, we leave swizzling up to the shader compiler. + * + * Note: Contrary to the docs, the swizzle still applies even if the + * return size is 32. It's just that you probably want to swizzle in + * the shader, because you need the Y/Z/W channels to be defined. + */ + if (return_size == 32) { + unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } else { + unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]); + unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]); + unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]); + unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]); + } + + int min_img_filter = psampler->min_img_filter; + int min_mip_filter = psampler->min_mip_filter; + int mag_img_filter = psampler->mag_img_filter; + + if (return_size == 32) { + min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; + switch (min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + unpacked.filter += min_nearest ? 2 : 0; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + unpacked.filter += min_nearest ? 4 : 8; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + unpacked.filter += min_nearest ? 4 : 8; + unpacked.filter += 2; + break; + } + + if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) + unpacked.filter++; + + if (psampler->max_anisotropy > 8) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; + else if (psampler->max_anisotropy > 4) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; + else if (psampler->max_anisotropy > 2) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; + else if (psampler->max_anisotropy) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; + + uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; + cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); + + for (int i = 0; i < ARRAY_SIZE(packed); i++) + packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; + + /* TMU indirect structs need to be 32b aligned. */ + vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); + cl_emit_prepacked(&job->indirect, &packed); +} + +static void +emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) { + if (stage_tex->textures[i]) + emit_one_texture(vc5, stage_tex, i); + } +} +#endif /* V3D_VERSION < 40 */ + +static uint32_t +translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt) +{ + if (vc5->swap_color_rb & (1 << rt)) { + colormask = ((colormask & (2 | 8)) | + ((colormask & 1) << 2) | + ((colormask & 4) >> 2)); + } + + return (~colormask) & 0xf; +} + +static void +emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job, + struct pipe_blend_state *blend, int rt) +{ + cl_emit(&job->bcl, BLEND_CONFIG, config) { + struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; + +#if V3D_VERSION >= 40 + config.render_target_mask = 1 << rt; +#else + assert(rt == 0); +#endif + + config.colour_blend_mode = rtblend->rgb_func; + config.colour_blend_dst_factor = + vc5_factor(rtblend->rgb_dst_factor, + vc5->blend_dst_alpha_one); + config.colour_blend_src_factor = + vc5_factor(rtblend->rgb_src_factor, + vc5->blend_dst_alpha_one); + + config.alpha_blend_mode = rtblend->alpha_func; + config.alpha_blend_dst_factor = + vc5_factor(rtblend->alpha_dst_factor, + vc5->blend_dst_alpha_one); + config.alpha_blend_src_factor = + vc5_factor(rtblend->alpha_src_factor, + vc5->blend_dst_alpha_one); + } +} + +void +v3dX(emit_state)(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5->job; + bool rasterizer_discard = vc5->rasterizer->base.rasterizer_discard; + + if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | + VC5_DIRTY_RASTERIZER)) { + float *vpscale = vc5->viewport.scale; + float *vptranslate = vc5->viewport.translate; + float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; + float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; + float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; + float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; + + /* Clip to the scissor if it's enabled, but still clip to the + * drawable regardless since that controls where the binner + * tries to put things. + * + * Additionally, always clip the rendering to the viewport, + * since the hardware does guardband clipping, meaning + * primitives would rasterize outside of the view volume. + */ + uint32_t minx, miny, maxx, maxy; + if (!vc5->rasterizer->base.scissor) { + minx = MAX2(vp_minx, 0); + miny = MAX2(vp_miny, 0); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); + } else { + minx = MAX2(vp_minx, vc5->scissor.minx); + miny = MAX2(vp_miny, vc5->scissor.miny); + maxx = MIN2(vp_maxx, vc5->scissor.maxx); + maxy = MIN2(vp_maxy, vc5->scissor.maxy); + } + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_width_in_pixels = maxx - minx; + clip.clip_window_height_in_pixels = maxy - miny; + +#if V3D_VERSION < 41 + /* The HW won't entirely clip out when scissor w/h is + * 0. Just treat it the same as rasterizer discard. + */ + if (clip.clip_window_width_in_pixels == 0 || + clip.clip_window_height_in_pixels == 0) { + rasterizer_discard = true; + clip.clip_window_width_in_pixels = 1; + clip.clip_window_height_in_pixels = 1; + } +#endif + } + + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); + } + + if (vc5->dirty & (VC5_DIRTY_RASTERIZER | + VC5_DIRTY_ZSA | + VC5_DIRTY_BLEND | + VC5_DIRTY_COMPILED_FS)) { + cl_emit(&job->bcl, CONFIGURATION_BITS, config) { + config.enable_forward_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_FRONT); + config.enable_reverse_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_BACK); + /* This seems backwards, but it's what gets the + * clipflat test to pass. + */ + config.clockwise_primitives = + vc5->rasterizer->base.front_ccw; + + config.enable_depth_offset = + vc5->rasterizer->base.offset_tri; + + config.rasterizer_oversample_mode = + vc5->rasterizer->base.multisample; + + config.direct3d_provoking_vertex = + vc5->rasterizer->base.flatshade_first; + + config.blend_enable = vc5->blend->rt[0].blend_enable; + + /* Note: EZ state may update based on the compiled FS, + * along with ZSA + */ + config.early_z_updates_enable = + (job->ez_state != VC5_EZ_DISABLED); + if (vc5->zsa->base.depth.enabled) { + config.z_updates_enable = + vc5->zsa->base.depth.writemask; + config.early_z_enable = + config.early_z_updates_enable; + config.depth_test_function = + vc5->zsa->base.depth.func; + } else { + config.depth_test_function = PIPE_FUNC_ALWAYS; + } + + config.stencil_enable = + vc5->zsa->base.stencil[0].enabled; + } + + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER && + vc5->rasterizer->base.offset_tri) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_factor = + vc5->rasterizer->offset_factor; + depth.depth_offset_units = + vc5->rasterizer->offset_units; + } + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER) { + cl_emit(&job->bcl, POINT_SIZE, point_size) { + point_size.point_size = vc5->rasterizer->point_size; + } + + cl_emit(&job->bcl, LINE_WIDTH, line_width) { + line_width.line_width = vc5->rasterizer->base.line_width; + } + } + + if (vc5->dirty & VC5_DIRTY_VIEWPORT) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = + vc5->viewport.scale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = + vc5->viewport.scale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc5->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc5->viewport.scale[2]; + } + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + clip.minimum_zw = (vc5->viewport.translate[2] - + vc5->viewport.scale[2]); + clip.maximum_zw = (vc5->viewport.translate[2] + + vc5->viewport.scale[2]); + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + vc5->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + vc5->viewport.translate[1]; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) { + struct pipe_blend_state *blend = vc5->blend; + + if (blend->independent_blend_enable) { + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) + emit_rt_blend(vc5, job, blend, i); + } else { + emit_rt_blend(vc5, job, blend, 0); + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND) { + struct pipe_blend_state *blend = vc5->blend; + + cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { + if (blend->independent_blend_enable) { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[1].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[2].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[3].colormask, 3); + } else { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 3); + } + } + } + + /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant + * color. + */ + if (vc5->dirty & VC5_DIRTY_BLEND_COLOR || + (V3D_VERSION < 41 && (vc5->dirty & VC5_DIRTY_BLEND))) { + cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { + colour.red_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[2] : + vc5->blend_color.hf[0]); + colour.green_f16 = vc5->blend_color.hf[1]; + colour.blue_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[0] : + vc5->blend_color.hf[2]); + colour.alpha_f16 = vc5->blend_color.hf[3]; + } + } + + if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { + struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; + struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; + + if (front->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_front, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[0]; + } + } + + if (back->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_back, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[1]; + } + } + } + +#if V3D_VERSION < 40 + /* Pre-4.x, we have texture state that depends on both the sampler and + * the view, so we merge them together at draw time. + */ + if (vc5->dirty & VC5_DIRTY_FRAGTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_VERTTEX) + emit_textures(vc5, &vc5->verttex); +#endif + + if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i]) + continue; + + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.flat_shade_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->flat_shade_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); + } + } + +#if V3D_VERSION >= 40 + if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->centroid_flags[i]) + continue; + + cl_emit(&job->bcl, CENTROID_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.centroid_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->centroid_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); + } + } +#endif + + /* Set up the transform feedback data specs (which VPM entries to + * output to which buffers). + */ + if (vc5->dirty & (VC5_DIRTY_STREAMOUT | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_PRIM_MODE)) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + + if (so->num_targets) { + bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + uint16_t *tf_specs = (psiz_per_vertex ? + vc5->prog.bind_vs->tf_specs_psiz : + vc5->prog.bind_vs->tf_specs); + +#if V3D_VERSION >= 40 + job->tf_enabled = (vc5->prog.bind_vs->num_tf_specs != 0 && + vc5->active_queries); + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + tfe.enable = job->tf_enabled; + }; +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { + tfe.number_of_32_bit_output_buffer_address_following = + so->num_targets; + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + }; +#endif /* V3D_VERSION < 40 */ + for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { + cl_emit_prepacked(&job->bcl, &tf_specs[i]); + } + } else if (job->tf_enabled) { +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.enable = false; + }; + job->tf_enabled = false; +#endif /* V3D_VERSION >= 40 */ + } + } + + /* Set up the trasnform feedback buffers. */ + if (vc5->dirty & VC5_DIRTY_STREAMOUT) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + for (int i = 0; i < so->num_targets; i++) { + const struct pipe_stream_output_target *target = + so->targets[i]; + struct vc5_resource *rsc = target ? + vc5_resource(target->buffer) : NULL; + +#if V3D_VERSION >= 40 + if (!target) + continue; + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) { + output.buffer_address = + cl_address(rsc->bo, + target->buffer_offset); + output.buffer_size_in_32_bit_words = + target->buffer_size >> 2; + output.buffer_number = i; + } +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { + if (target) { + output.address = + cl_address(rsc->bo, + target->buffer_offset); + } + }; +#endif /* V3D_VERSION < 40 */ + if (target) { + vc5_job_add_write_resource(vc5->job, + target->buffer); + } + /* XXX: buffer_size? */ + } + } + + if (vc5->dirty & VC5_DIRTY_OQ) { + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { + job->oq_enabled = vc5->active_queries && vc5->current_oq; + if (job->oq_enabled) { + counter.address = cl_address(vc5->current_oq, 0); + } + } + } +} diff --git a/src/gallium/drivers/v3d/v3dx_format_table.c b/src/gallium/drivers/v3d/v3dx_format_table.c new file mode 100644 index 00000000000..458488119c7 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_format_table.c @@ -0,0 +1,318 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" + +#include "v3d_context.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/common/v3d_macros.h" +#include "v3d_format_table.h" + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \ + [PIPE_FORMAT_##pipe] = { \ + true, \ + V3D_OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + return_channels, \ + } + +#define SWIZ_X001 SWIZ(X, 0, 0, 1) +#define SWIZ_XY01 SWIZ(X, Y, 0, 1) +#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) +#define SWIZ_XYZW SWIZ(X, Y, Z, W) +#define SWIZ_YZWX SWIZ(Y, Z, W, X) +#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) +#define SWIZ_ZYXW SWIZ(Z, Y, X, W) +#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) +#define SWIZ_XXXY SWIZ(X, X, X, Y) +#define SWIZ_XXX1 SWIZ(X, X, X, 1) +#define SWIZ_XXXX SWIZ(X, X, X, X) +#define SWIZ_000X SWIZ(0, 0, 0, X) + +static const struct vc5_format format_table[] = { + FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_UNORM, RGBA8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_UNORM, RGBA8, RGBA8, SWIZ_XYZ1, 16, 0), + FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), + FORMAT(R10G10B10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, 0), + FORMAT(R10G10B10A2_UINT, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, 0), + + FORMAT(A4B4G4R4_UNORM, ABGR4444, RGBA4, SWIZ_XYZW, 16, 0), + + FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0), + FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0), + FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0), + + FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0), + FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0), + FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0), + FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0), + + FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1), + FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1), + FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0), + FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2), + FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2), + FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2), + + FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4), + + /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */ + FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1), + FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1), + FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1), + FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1), + FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1), + FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1), + FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2), + FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2), + + FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0), + FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0), + FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0), + FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0), + + FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, 0), + FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, 0), + FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, 0), + FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, 0), + FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, 0), + + FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, 0), + FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, 0), + FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, 0), + FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, 0), + FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, 0), + FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, 0), + + FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, 1), + FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, 1), + FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, 2), + FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, 2), + FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, 4), + FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, 4), + + FORMAT(A8_SINT, R8I, R8I, SWIZ_000X, 16, 0), + FORMAT(A8_UINT, R8UI, R8UI, SWIZ_000X, 16, 0), + FORMAT(A16_SINT, R16I, R16I, SWIZ_000X, 16, 0), + FORMAT(A16_UINT, R16UI, R16UI, SWIZ_000X, 16, 0), + FORMAT(A32_SINT, R32I, R32I, SWIZ_000X, 32, 1), + FORMAT(A32_UINT, R32UI, R32UI, SWIZ_000X, 32, 1), + + FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0), + FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0), + +#if V3D_VERSION >= 40 + FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(S8X24_UINT, S8, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z32_FLOAT, D32F, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z16_UNORM, D16, DEPTH_COMP16,SWIZ_XXXX, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, D32F, R32F, SWIZ_XXXX, 32, 1), +#else + FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), +#endif + + FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0), + FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0), + + FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0), + FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0), + FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0), +}; + +const struct vc5_format * +v3dX(get_format_desc)(enum pipe_format f) +{ + if (f < ARRAY_SIZE(format_table) && format_table[f].present) + return &format_table[f]; + else + return NULL; +} + +void +v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + switch (format) { + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: +#if V3D_VERSION < 41 + case V3D_OUTPUT_IMAGE_FORMAT_RGBX8: +#endif + case V3D_OUTPUT_IMAGE_FORMAT_RGB8: + case V3D_OUTPUT_IMAGE_FORMAT_RG8: + case V3D_OUTPUT_IMAGE_FORMAT_R8: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444: + case V3D_OUTPUT_IMAGE_FORMAT_BGR565: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555: + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I: + case V3D_OUTPUT_IMAGE_FORMAT_RG8I: + case V3D_OUTPUT_IMAGE_FORMAT_R8I: + *type = V3D_INTERNAL_TYPE_8I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI: + case V3D_OUTPUT_IMAGE_FORMAT_RG8UI: + case V3D_OUTPUT_IMAGE_FORMAT_R8UI: + *type = V3D_INTERNAL_TYPE_8UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: + case V3D_OUTPUT_IMAGE_FORMAT_SRGB: + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: + case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: +#if V3D_VERSION < 41 + case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8: +#endif + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: + /* Note that sRGB RTs are stored in the tile buffer at 16F, + * and the conversion to sRGB happens at tilebuffer + * load/store. + */ + *type = V3D_INTERNAL_TYPE_16F; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG16F: + case V3D_OUTPUT_IMAGE_FORMAT_R16F: + *type = V3D_INTERNAL_TYPE_16F; + /* Use 64bpp to make sure the TLB doesn't throw away the alpha + * channel before alpha test happens. + */ + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG16I: + case V3D_OUTPUT_IMAGE_FORMAT_R16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI: + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG16UI: + case V3D_OUTPUT_IMAGE_FORMAT_R16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_32; + break; + + default: + /* Provide some default values, as we'll be called at RB + * creation time, even if an RB with this format isn't + * supported. + */ + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + } +} diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c new file mode 100644 index 00000000000..5e1a345b170 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_job.c @@ -0,0 +1,76 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file v3dx_job.c + * + * V3D version-specific functions for submitting VC5 render jobs to the + * kernel. + */ + +#include "v3d_context.h" +#include "broadcom/cle/v3dx_pack.h" + +void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job) +{ + vc5_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(OCCLUSION_QUERY_COUNTER) + +#if V3D_VERSION >= 41 + cl_packet_length(TRANSFORM_FEEDBACK_SPECS) + +#endif + cl_packet_length(INCREMENT_SEMAPHORE) + + cl_packet_length(FLUSH_ALL_STATE)); + + if (job->oq_enabled) { + /* Disable the OQ at the end of the CL, so that the + * draw calls at the start of the CL don't inherit the + * OQ counter. + */ + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); + } + + /* Disable TF at the end of the CL, so that the next job to be + * run doesn't start out trying to write TF primitives. On + * V3D 3.x, it's only the TF primitive mode that triggers TF + * writes. + */ +#if V3D_VERSION >= 41 + if (job->tf_enabled) { + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.enable = false; + }; + } +#endif /* V3D_VERSION >= 41 */ + + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act + * until the FLUSH completes. + */ + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); + + /* The FLUSH_ALL emits any unwritten state changes in each + * tile. We can use this to reset any state that needs to be + * present at the start of the next tile, as we do with + * OCCLUSION_QUERY_COUNTER above. + */ + cl_emit(&job->bcl, FLUSH_ALL_STATE, flush); +} diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c new file mode 100644 index 00000000000..3801d03ecee --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_rcl.c @@ -0,0 +1,782 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "v3d_context.h" +#include "v3d_tiling.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ + PIPE_CLEAR_COLOR1 | \ + PIPE_CLEAR_COLOR2 | \ + PIPE_CLEAR_COLOR3) \ + +#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) + +/* The HW queues up the load until the tile coordinates show up, but can only + * track one at a time. If we need to do more than one load, then we need to + * flush out the previous load by emitting the tile coordinates and doing a + * dummy store. + */ +static void +flush_last_load(struct vc5_cl *cl) +{ + if (V3D_VERSION >= 40) + return; + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +} + +static void +load_general(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, + uint32_t pipe_bit, uint32_t *loads_pending) +{ + struct vc5_surface *surf = vc5_surface(psurf); + bool separate_stencil = surf->separate_stencil && buffer == STENCIL; + if (separate_stencil) { + psurf = surf->separate_stencil; + surf = vc5_surface(psurf); + } + + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = buffer; + load.address = cl_address(rsc->bo, surf->offset); + +#if V3D_VERSION >= 40 + load.memory_format = surf->tiling; + if (separate_stencil) + load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; + else + load.input_image_format = surf->format; + + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + load.height_in_ub_or_stride = + surf->padded_height_of_output_image_in_uif_blocks; + } else if (surf->tiling == VC5_TILING_RASTER) { + struct vc5_resource_slice *slice = + &rsc->slices[psurf->u.tex.level]; + load.height_in_ub_or_stride = slice->stride; + } + + if (psurf->texture->nr_samples > 1) + load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + +#else /* V3D_VERSION < 40 */ + /* Can't do raw ZSTENCIL loads -- need to load/store them to + * separate buffers for Z and stencil. + */ + assert(buffer != ZSTENCIL); + load.raw_mode = true; + load.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; +#endif /* V3D_VERSION < 40 */ + } + + *loads_pending &= ~pipe_bit; + if (*loads_pending) + flush_last_load(cl); +} + +static void +store_general(struct vc5_job *job, + struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, + int pipe_bit, uint32_t *stores_pending, bool general_color_clear) +{ + struct vc5_surface *surf = vc5_surface(psurf); + bool separate_stencil = surf->separate_stencil && buffer == STENCIL; + if (separate_stencil) { + psurf = surf->separate_stencil; + surf = vc5_surface(psurf); + } + + *stores_pending &= ~pipe_bit; + bool last_store = !(*stores_pending); + + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + rsc->writes++; + + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = buffer; + store.address = cl_address(rsc->bo, surf->offset); + +#if V3D_VERSION >= 40 + store.clear_buffer_being_stored = + ((job->cleared & pipe_bit) && + (general_color_clear || + !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS))); + + if (separate_stencil) + store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; + else + store.output_image_format = surf->format; + + store.memory_format = surf->tiling; + + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + store.height_in_ub_or_stride = + surf->padded_height_of_output_image_in_uif_blocks; + } else if (surf->tiling == VC5_TILING_RASTER) { + struct vc5_resource_slice *slice = + &rsc->slices[psurf->u.tex.level]; + store.height_in_ub_or_stride = slice->stride; + } + + if (psurf->texture->nr_samples > 1) + store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + +#else /* V3D_VERSION < 40 */ + /* Can't do raw ZSTENCIL stores -- need to load/store them to + * separate buffers for Z and stencil. + */ + assert(buffer != ZSTENCIL); + store.raw_mode = true; + if (!last_store) { + store.disable_colour_buffers_clear_on_write = true; + store.disable_z_buffer_clear_on_write = true; + store.disable_stencil_buffer_clear_on_write = true; + } else { + store.disable_colour_buffers_clear_on_write = + !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && + general_color_clear && + (job->cleared & pipe_bit))); + store.disable_z_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_DEPTH); + store.disable_stencil_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_STENCIL); + } + store.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; +#endif /* V3D_VERSION < 40 */ + } + + /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ + if (V3D_VERSION < 40 && !last_store) { + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } +} + +static int +zs_buffer_from_pipe_bits(int pipe_clear_bits) +{ + switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { + case PIPE_CLEAR_DEPTHSTENCIL: + return ZSTENCIL; + case PIPE_CLEAR_DEPTH: + return Z; + case PIPE_CLEAR_STENCIL: + return STENCIL; + default: + return NONE; + } +} + +static void +vc5_rcl_emit_loads(struct vc5_job *job, struct vc5_cl *cl) +{ + uint32_t loads_pending = job->resolve & ~job->cleared; + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(loads_pending & bit)) + continue; + + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf || (V3D_VERSION < 40 && + psurf->texture->nr_samples <= 1)) { + continue; + } + + load_general(cl, psurf, RENDER_TARGET_0 + i, + bit, &loads_pending); + } + + if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && + (V3D_VERSION >= 40 || + (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + + if (rsc->separate_stencil && + (loads_pending & PIPE_CLEAR_STENCIL)) { + load_general(cl, job->zsbuf, + STENCIL, + PIPE_CLEAR_STENCIL, + &loads_pending); + } + + if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { + load_general(cl, job->zsbuf, + zs_buffer_from_pipe_bits(loads_pending), + loads_pending & PIPE_CLEAR_DEPTHSTENCIL, + &loads_pending); + } + } + +#if V3D_VERSION < 40 + /* The initial reload will be queued until we get the + * tile coordinates. + */ + if (loads_pending) { + cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) { + load.disable_colour_buffer_load = + (~loads_pending & + PIPE_CLEAR_COLOR_BUFFERS) >> + PIPE_FIRST_COLOR_BUFFER_BIT; + load.enable_z_load = + loads_pending & PIPE_CLEAR_DEPTH; + load.enable_stencil_load = + loads_pending & PIPE_CLEAR_STENCIL; + } + } +#else /* V3D_VERSION >= 40 */ + assert(!loads_pending); + cl_emit(cl, END_OF_LOADS, end); +#endif +} + +static void +vc5_rcl_emit_stores(struct vc5_job *job, struct vc5_cl *cl) +{ + MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS; + MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH; + MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL; + + /* For clearing color in a TLB general on V3D 3.3: + * + * - NONE buffer store clears all TLB color buffers. + * - color buffer store clears just the TLB color buffer being stored. + * - Z/S buffers store may not clear the TLB color buffer. + * + * And on V3D 4.1, we only have one flag for "clear the buffer being + * stored" in the general packet, and a separate packet to clear all + * color TLB buffers. + * + * As a result, we only bother flagging TLB color clears in a general + * packet when we don't have to emit a separate packet to clear all + * TLB color buffers. + */ + bool general_color_clear = (needs_color_clear && + (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) == + (job->resolve & PIPE_CLEAR_COLOR_BUFFERS)); + + uint32_t stores_pending = job->resolve; + + /* For V3D 4.1, use general stores for all TLB stores. + * + * For V3D 3.3, we only use general stores to do raw stores for any + * MSAA surfaces. These output UIF tiled images where each 4x MSAA + * pixel is a 2x2 quad, and the format will be that of the + * internal_type/internal_bpp, rather than the format from GL's + * perspective. Non-MSAA surfaces will use + * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. + */ + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(job->resolve & bit)) + continue; + + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf || + (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { + continue; + } + + store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit, + &stores_pending, general_color_clear); + } + + if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && + !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + if (rsc->separate_stencil) { + if (job->resolve & PIPE_CLEAR_DEPTH) { + store_general(job, cl, job->zsbuf, Z, + PIPE_CLEAR_DEPTH, + &stores_pending, + general_color_clear); + } + + if (job->resolve & PIPE_CLEAR_STENCIL) { + store_general(job, cl, job->zsbuf, STENCIL, + PIPE_CLEAR_STENCIL, + &stores_pending, + general_color_clear); + } + } else { + store_general(job, cl, job->zsbuf, + zs_buffer_from_pipe_bits(job->resolve), + job->resolve & PIPE_CLEAR_DEPTHSTENCIL, + &stores_pending, general_color_clear); + } + } + + if (stores_pending) { +#if V3D_VERSION < 40 + cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { + + store.disable_color_buffer_write = + (~stores_pending >> + PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; + store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; + store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; + + /* Note that when set this will clear all of the color + * buffers. + */ + store.disable_colour_buffers_clear_on_write = + !needs_color_clear; + store.disable_z_buffer_clear_on_write = + !needs_z_clear; + store.disable_stencil_buffer_clear_on_write = + !needs_s_clear; + }; +#else /* V3D_VERSION >= 40 */ + unreachable("All color buffers should have been stored."); +#endif /* V3D_VERSION >= 40 */ + } else if (needs_color_clear && !general_color_clear) { + /* If we didn't do our color clears in the general packet, + * then emit a packet to clear all the TLB color buffers now. + */ +#if V3D_VERSION < 40 + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +#else /* V3D_VERSION >= 40 */ + cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_all_render_targets = true; + } +#endif /* V3D_VERSION >= 40 */ + } +} + +static void +vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf) +{ + /* Emit the generic list in our indirect state -- the rcl will just + * have pointers into it. + */ + struct vc5_cl *cl = &job->indirect; + vc5_cl_ensure_space(cl, 200, 1); + struct vc5_cl_reloc tile_list_start = cl_get_address(cl); + + if (V3D_VERSION >= 40) { + /* V3D 4.x only requires a single tile coordinates, and + * END_OF_LOADS switches us between loading and rendering. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + vc5_rcl_emit_loads(job, cl); + + if (V3D_VERSION < 40) { + /* Tile Coordinates triggers the last reload and sets where + * the stores go. There must be one per store packet. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + /* The binner starts out writing tiles assuming that the initial mode + * is triangles, so make sure that's the case. + */ + cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) { + fmt.data_type = LIST_INDEXED; + fmt.primitive_type = LIST_TRIANGLES; + } + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + vc5_rcl_emit_stores(job, cl); + +#if V3D_VERSION >= 40 + cl_emit(cl, END_OF_TILE_MARKER, end); +#endif + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = cl_get_address(cl); + } +} + +#if V3D_VERSION >= 40 +static void +v3d_setup_render_target(struct vc5_job *job, int cbuf, + uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) +{ + if (!job->cbufs[cbuf]) + return; + + struct vc5_surface *surf = vc5_surface(job->cbufs[cbuf]); + *rt_bpp = surf->internal_bpp; + *rt_type = surf->internal_type; + *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; +} + +#else /* V3D_VERSION < 40 */ + +static void +v3d_emit_z_stencil_config(struct vc5_job *job, struct vc5_surface *surf, + struct vc5_resource *rsc, bool is_separate_stencil) +{ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) { + zs.address = cl_address(rsc->bo, surf->offset); + + if (!is_separate_stencil) { + zs.internal_type = surf->internal_type; + zs.output_image_format = surf->format; + } else { + zs.z_stencil_id = 1; /* Separate stencil */ + } + + zs.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; + + assert(surf->tiling != VC5_TILING_RASTER); + zs.memory_format = surf->tiling; + } + + if (job->resolve & (is_separate_stencil ? + PIPE_CLEAR_STENCIL : + PIPE_CLEAR_DEPTHSTENCIL)) { + rsc->writes++; + } +} +#endif /* V3D_VERSION < 40 */ + +#define div_round_up(a, b) (((a) + (b) - 1) / b) + +void +v3dX(emit_rcl)(struct vc5_job *job) +{ + /* The RCL list should be empty. */ + assert(!job->rcl.bo); + + vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + job->submit.rcl_start = job->rcl.bo->offset; + vc5_job_add_bo(job, job->rcl.bo); + + int nr_cbufs = 0; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) + nr_cbufs = i + 1; + } + + /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION + * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are + * optional updates to the previous HW state. + */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION, + config) { +#if V3D_VERSION < 40 + config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH; + config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL; +#else /* V3D_VERSION >= 40 */ + if (job->zsbuf) { + struct vc5_surface *surf = vc5_surface(job->zsbuf); + config.internal_depth_type = surf->internal_type; + } +#endif /* V3D_VERSION >= 40 */ + + /* XXX: Early D/S clear */ + + switch (job->first_ez_state) { + case VC5_EZ_UNDECIDED: + case VC5_EZ_LT_LE: + config.early_z_disable = false; + config.early_z_test_and_update_direction = + EARLY_Z_DIRECTION_LT_LE; + break; + case VC5_EZ_GT_GE: + config.early_z_disable = false; + config.early_z_test_and_update_direction = + EARLY_Z_DIRECTION_GT_GE; + break; + case VC5_EZ_DISABLED: + config.early_z_disable = true; + } + + config.image_width_pixels = job->draw_width; + config.image_height_pixels = job->draw_height; + + config.number_of_render_targets_minus_1 = + MAX2(nr_cbufs, 1) - 1; + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + for (int i = 0; i < nr_cbufs; i++) { + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf) + continue; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + MAYBE_UNUSED uint32_t config_pad = 0; + uint32_t clear_pad = 0; + + /* XXX: Set the pad for raster. */ + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + int uif_block_height = vc5_utile_height(rsc->cpp) * 2; + uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) / + uif_block_height); + if (surf->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height < 15) { + config_pad = (surf->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height); + } else { + config_pad = 15; + clear_pad = surf->padded_height_of_output_image_in_uif_blocks; + } + } + +#if V3D_VERSION < 40 + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + rt.address = cl_address(rsc->bo, surf->offset); + rt.internal_type = surf->internal_type; + rt.output_image_format = surf->format; + rt.memory_format = surf->tiling; + rt.internal_bpp = surf->internal_bpp; + rt.render_target_number = i; + rt.pad = config_pad; + + if (job->resolve & PIPE_CLEAR_COLOR0 << i) + rsc->writes++; + } +#endif /* V3D_VERSION < 40 */ + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1, + clear) { + clear.clear_color_low_32_bits = job->clear_color[i][0]; + clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; + clear.render_target_number = i; + }; + + if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2, + clear) { + clear.clear_color_mid_low_32_bits = + ((job->clear_color[i][1] >> 24) | + (job->clear_color[i][2] << 8)); + clear.clear_color_mid_high_24_bits = + ((job->clear_color[i][2] >> 24) | + ((job->clear_color[i][3] & 0xffff) << 8)); + clear.render_target_number = i; + }; + } + + if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3, + clear) { + clear.uif_padded_height_in_uif_blocks = clear_pad; + clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; + clear.render_target_number = i; + }; + } + } + +#if V3D_VERSION >= 40 + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + v3d_setup_render_target(job, 0, + &rt.render_target_0_internal_bpp, + &rt.render_target_0_internal_type, + &rt.render_target_0_clamp); + v3d_setup_render_target(job, 1, + &rt.render_target_1_internal_bpp, + &rt.render_target_1_internal_type, + &rt.render_target_1_clamp); + v3d_setup_render_target(job, 2, + &rt.render_target_2_internal_bpp, + &rt.render_target_2_internal_type, + &rt.render_target_2_clamp); + v3d_setup_render_target(job, 3, + &rt.render_target_3_internal_bpp, + &rt.render_target_3_internal_type, + &rt.render_target_3_clamp); + } +#endif + +#if V3D_VERSION < 40 + /* TODO: Don't bother emitting if we don't load/clear Z/S. */ + if (job->zsbuf) { + struct pipe_surface *psurf = job->zsbuf; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + v3d_emit_z_stencil_config(job, surf, rsc, false); + + /* Emit the separate stencil packet if we have a resource for + * it. The HW will only load/store this buffer if the + * Z/Stencil config doesn't have stencil in its format. + */ + if (surf->separate_stencil) { + v3d_emit_z_stencil_config(job, + vc5_surface(surf->separate_stencil), + rsc->separate_stencil, true); + } + } +#endif /* V3D_VERSION < 40 */ + + /* Ends rendering mode config. */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES, + clear) { + clear.z_clear_value = job->clear_z; + clear.stencil_vg_mask_clear_value = job->clear_s; + }; + + /* Always set initial block size before the first branch, which needs + * to match the value from binning mode config. + */ + cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + uint32_t supertile_w = 1, supertile_h = 1; + + /* If doing multicore binning, we would need to initialize each core's + * tile list here. + */ + cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = cl_address(job->tile_alloc, 0); + } + + cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) { + uint32_t frame_w_in_supertiles, frame_h_in_supertiles; + const uint32_t max_supertiles = 256; + + /* Size up our supertiles until we get under the limit. */ + for (;;) { + frame_w_in_supertiles = div_round_up(job->draw_tiles_x, + supertile_w); + frame_h_in_supertiles = div_round_up(job->draw_tiles_y, + supertile_h); + if (frame_w_in_supertiles * frame_h_in_supertiles < + max_supertiles) { + break; + } + + if (supertile_w < supertile_h) + supertile_w++; + else + supertile_h++; + } + + config.total_frame_width_in_tiles = job->draw_tiles_x; + config.total_frame_height_in_tiles = job->draw_tiles_y; + + config.supertile_width_in_tiles_minus_1 = supertile_w - 1; + config.supertile_height_in_tiles_minus_1 = supertile_h - 1; + + config.total_frame_width_in_supertiles = frame_w_in_supertiles; + config.total_frame_height_in_supertiles = frame_h_in_supertiles; + } + + /* Start by clearing the tile buffer. */ + cl_emit(&job->rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = 0; + coords.tile_row_number = 0; + } + + /* Emit an initial clear of the tile buffers. This is necessary for + * any buffers that should be cleared (since clearing normally happens + * at the *end* of the generic tile list), but it's also nice to clear + * everything so the first tile doesn't inherit any contents from some + * previous frame. + * + * Also, implement the GFXH-1742 workaround. There's a race in the HW + * between the RCL updating the TLB's internal type/size and the + * spawning of the QPU instances using the TLB's current internal + * type/size. To make sure the QPUs get the right state,, we need 1 + * dummy store in between internal type/size changes on V3D 3.x, and 2 + * dummy stores on 4.x. + */ +#if V3D_VERSION < 40 + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +#else + for (int i = 0; i < 2; i++) { + if (i > 0) + cl_emit(&job->rcl, TILE_COORDINATES, coords); + cl_emit(&job->rcl, END_OF_LOADS, end); + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + if (i == 0) { + cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = true; + clear.clear_all_render_targets = true; + } + } + cl_emit(&job->rcl, END_OF_TILE_MARKER, end); + } +#endif + + cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); + + vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); + + cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); + + /* XXX: Use Morton order */ + uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; + uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; + uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; + uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; + + uint32_t max_x_supertile = 0; + uint32_t max_y_supertile = 0; + if (job->draw_max_x != 0 && job->draw_max_y != 0) { + max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; + max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; + } + + for (int y = min_y_supertile; y <= max_y_supertile; y++) { + for (int x = min_x_supertile; x <= max_x_supertile; x++) { + cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } + + cl_emit(&job->rcl, END_OF_RENDERING, end); +} diff --git a/src/gallium/drivers/v3d/v3dx_simulator.c b/src/gallium/drivers/v3d/v3dx_simulator.c new file mode 100644 index 00000000000..ee8b6f2b9fd --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_simulator.c @@ -0,0 +1,190 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator_hw.c + * + * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator. + * + * The register headers between V3D versions will have conflicting defines, so + * all register interactions appear in this file and are compiled per V3D version + * we support. + */ + +#ifdef USE_V3D_SIMULATOR + +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_simulator_wrapper.h" + +#define HW_REGISTER_RO(x) (x) +#define HW_REGISTER_RW(x) (x) +#if V3D_VERSION >= 41 +#include "libs/core/v3d/registers/4.1.34.0/v3d.h" +#else +#include "libs/core/v3d/registers/3.3.0.0/v3d.h" +#endif + +#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val) +#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) + +static void +vc5_flush_l3(struct v3d_hw *v3d) +{ + if (!v3d_hw_has_gca(v3d)) + return; + +#if V3D_VERSION < 40 + uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); + + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); +#endif +} + +/* Invalidates the L2 cache. This is a read-only cache. */ +static void +vc5_flush_l2(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_L2CACTL, + V3D_CTL_0_L2CACTL_L2CCLR_SET | + V3D_CTL_0_L2CACTL_L2CENA_SET); +} + +/* Invalidates texture L2 cachelines */ +static void +vc5_flush_l2t(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); + V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); + V3D_WRITE(V3D_CTL_0_L2TCACTL, + V3D_CTL_0_L2TCACTL_L2TFLS_SET | + (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +vc5_flush_slices(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); +} + +static void +vc5_flush_caches(struct v3d_hw *v3d) +{ + vc5_flush_l3(v3d); + vc5_flush_l2(v3d); + vc5_flush_l2t(v3d); + vc5_flush_slices(v3d); +} + +int +v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_get_param *args) +{ + static const uint32_t reg_map[] = { + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, + }; + + if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { + args->value = V3D_READ(reg_map[args->param]); + return 0; + } + + fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", + (long long)args->value); + abort(); +} + +void +v3dX(simulator_init_regs)(struct v3d_hw *v3d) +{ +#if V3D_VERSION == 33 + /* Set OVRTMUOUT to match kernel behavior. + * + * This means that the texture sampler uniform configuration's tmu + * output type field is used, instead of using the hardware default + * behavior based on the texture type. If you want the default + * behavior, you can still put "2" in the indirect texture state's + * output_type field. + */ + V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); +#endif +} + +void +v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, + uint32_t gmp_ofs) +{ + /* Completely reset the GMP. */ + V3D_WRITE(V3D_GMP_0_CFG, + V3D_GMP_0_CFG_PROTENABLE_SET); + V3D_WRITE(V3D_GMP_0_TABLE_ADDR, gmp_ofs); + V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0); + while (V3D_READ(V3D_GMP_0_STATUS) & + V3D_GMP_0_STATUS_CFG_BUSY_SET) { + ; + } + + vc5_flush_caches(v3d); + + if (submit->qma) { + V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); + V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); + } +#if V3D_VERSION >= 41 + if (submit->qts) { + V3D_WRITE(V3D_CLE_0_CT0QTS, + V3D_CLE_0_CT0QTS_CTQTSEN_SET | + submit->qts); + } +#endif + V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); + V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); + + /* Wait for bin to complete before firing render, as it seems the + * simulator doesn't implement the semaphores. + */ + while (V3D_READ(V3D_CLE_0_CT0CA) != + V3D_READ(V3D_CLE_0_CT0EA)) { + v3d_hw_tick(v3d); + } + + V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); + V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); + + while (V3D_READ(V3D_CLE_0_CT1CA) != + V3D_READ(V3D_CLE_0_CT1EA) || + V3D_READ(V3D_CLE_1_CT1CA) != + V3D_READ(V3D_CLE_1_CT1EA)) { + v3d_hw_tick(v3d); + } +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c new file mode 100644 index 00000000000..e992796a218 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_state.c @@ -0,0 +1,951 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_framebuffer.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_half.h" +#include "util/u_helpers.h" + +#include "v3d_context.h" +#include "v3d_tiling.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +static void * +vc5_generic_cso_state_create(const void *src, uint32_t size) +{ + void *dst = calloc(1, size); + if (!dst) + return NULL; + memcpy(dst, src, size); + return dst; +} + +static void +vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +vc5_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) { + vc5->blend_color.hf[i] = + util_float_to_half(blend_color->color[i]); + } + vc5->dirty |= VC5_DIRTY_BLEND_COLOR; +} + +static void +vc5_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stencil_ref = *stencil_ref; + vc5->dirty |= VC5_DIRTY_STENCIL_REF; +} + +static void +vc5_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->clip = *clip; + vc5->dirty |= VC5_DIRTY_CLIP; +} + +static void +vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); + vc5->dirty |= VC5_DIRTY_SAMPLE_MASK; +} + +static uint16_t +float_to_187_half(float f) +{ + return fui(f) >> 16; +} + +static void * +vc5_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct vc5_rasterizer_state *so; + + so = CALLOC_STRUCT(vc5_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, + * BCM21553). + */ + so->point_size = MAX2(cso->point_size, .125f); + + if (cso->offset_tri) { + so->offset_units = float_to_187_half(cso->offset_units); + so->offset_factor = float_to_187_half(cso->offset_scale); + } + + return so; +} + +/* Blend state is baked into shaders. */ +static void * +vc5_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + return vc5_generic_cso_state_create(cso, sizeof(*cso)); +} + +static uint32_t +translate_stencil_op(enum pipe_stencil_op op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: return V3D_STENCIL_OP_KEEP; + case PIPE_STENCIL_OP_ZERO: return V3D_STENCIL_OP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return V3D_STENCIL_OP_REPLACE; + case PIPE_STENCIL_OP_INCR: return V3D_STENCIL_OP_INCR; + case PIPE_STENCIL_OP_DECR: return V3D_STENCIL_OP_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: return V3D_STENCIL_OP_INCWRAP; + case PIPE_STENCIL_OP_DECR_WRAP: return V3D_STENCIL_OP_DECWRAP; + case PIPE_STENCIL_OP_INVERT: return V3D_STENCIL_OP_INVERT; + } + unreachable("bad stencil op"); +} + +static void * +vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct vc5_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->depth.enabled) { + switch (cso->depth.func) { + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + so->ez_state = VC5_EZ_LT_LE; + break; + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + so->ez_state = VC5_EZ_GT_GE; + break; + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + so->ez_state = VC5_EZ_UNDECIDED; + break; + default: + so->ez_state = VC5_EZ_DISABLED; + break; + } + + /* If stencil is enabled and it's not a no-op, then it would + * break EZ updates. + */ + if (cso->stencil[0].enabled && + (cso->stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP || + cso->stencil[0].func != PIPE_FUNC_ALWAYS || + (cso->stencil[1].enabled && + (cso->stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP && + cso->stencil[1].func != PIPE_FUNC_ALWAYS)))) { + so->ez_state = VC5_EZ_DISABLED; + } + } + + const struct pipe_stencil_state *front = &cso->stencil[0]; + const struct pipe_stencil_state *back = &cso->stencil[1]; + + if (front->enabled) { + v3dx_pack(&so->stencil_front, STENCIL_CONFIG, config) { + config.front_config = true; + /* If !back->enabled, then the front values should be + * used for both front and back-facing primitives. + */ + config.back_config = !back->enabled; + + config.stencil_write_mask = front->writemask; + config.stencil_test_mask = front->valuemask; + + config.stencil_test_function = front->func; + config.stencil_pass_op = + translate_stencil_op(front->zpass_op); + config.depth_test_fail_op = + translate_stencil_op(front->zfail_op); + config.stencil_test_fail_op = + translate_stencil_op(front->fail_op); + } + } + if (back->enabled) { + v3dx_pack(&so->stencil_back, STENCIL_CONFIG, config) { + config.front_config = false; + config.back_config = true; + + config.stencil_write_mask = back->writemask; + config.stencil_test_mask = back->valuemask; + + config.stencil_test_function = back->func; + config.stencil_pass_op = + translate_stencil_op(back->zpass_op); + config.depth_test_fail_op = + translate_stencil_op(back->zfail_op); + config.stencil_test_fail_op = + translate_stencil_op(back->fail_op); + } + } + + return so; +} + +static void +vc5_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stipple = *stipple; + vc5->dirty |= VC5_DIRTY_STIPPLE; +} + +static void +vc5_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->scissor = *scissor; + vc5->dirty |= VC5_DIRTY_SCISSOR; +} + +static void +vc5_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->viewport = *viewport; + vc5->dirty |= VC5_DIRTY_VIEWPORT; +} + +static void +vc5_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, + start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + vc5->dirty |= VC5_DIRTY_VTXBUF; +} + +static void +vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend = hwcso; + vc5->dirty |= VC5_DIRTY_BLEND; +} + +static void +vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->rasterizer = hwcso; + vc5->dirty |= VC5_DIRTY_RASTERIZER; +} + +static void +vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->zsa = hwcso; + vc5->dirty |= VC5_DIRTY_ZSA; +} + +static void * +vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + for (int i = 0; i < so->num_elements; i++) { + const struct pipe_vertex_element *elem = &elements[i]; + const struct util_format_description *desc = + util_format_description(elem->src_format); + uint32_t r_size = desc->channel[0].size; + + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + + v3dx_pack(&so->attrs[i * size], + GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* vec_size == 0 means 4 */ + attr.vec_size = desc->nr_channels & 3; + attr.signed_int_type = (desc->channel[0].type == + UTIL_FORMAT_TYPE_SIGNED); + + attr.normalized_int_type = desc->channel[0].normalized; + attr.read_as_int_uint = desc->channel[0].pure_integer; + attr.instance_divisor = MIN2(elem->instance_divisor, + 0xffff); + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr.type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr.type = ATTRIBUTE_HALF_FLOAT; + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr.type = ATTRIBUTE_INT; + break; + case 16: + attr.type = ATTRIBUTE_SHORT; + break; + case 10: + attr.type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr.type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr.type = ATTRIBUTE_BYTE; + abort(); + } + break; + + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + abort(); + } + } + } + + /* Set up the default attribute values in case any of the vertex + * elements use them. + */ + so->default_attribute_values = vc5_bo_alloc(vc5->screen, + VC5_MAX_ATTRIBUTES * + 4 * sizeof(float), + "default attributes"); + uint32_t *attrs = vc5_bo_map(so->default_attribute_values); + for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { + attrs[i * 4 + 0] = 0; + attrs[i * 4 + 1] = 0; + attrs[i * 4 + 2] = 0; + if (i < so->num_elements && + util_format_is_pure_integer(so->pipe[i].src_format)) { + attrs[i * 4 + 3] = 1; + } else { + attrs[i * 4 + 3] = fui(1.0); + } + } + + return so; +} + +static void +vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->vtx = hwcso; + vc5->dirty |= VC5_DIRTY_VTXSTATE; +} + +static void +vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + const struct pipe_constant_buffer *cb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; + + util_copy_constant_buffer(&so->cb[index], cb); + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + return; + } + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + vc5->dirty |= VC5_DIRTY_CONSTBUF; +} + +static void +vc5_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct pipe_framebuffer_state *cso = &vc5->framebuffer; + + vc5->job = NULL; + + util_copy_framebuffer_state(cso, framebuffer); + + vc5->swap_color_rb = 0; + vc5->blend_dst_alpha_one = 0; + for (int i = 0; i < vc5->framebuffer.nr_cbufs; i++) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + if (!cbuf) + continue; + + const struct util_format_description *desc = + util_format_description(cbuf->format); + + /* For BGRA8 formats (DRI window system default format), we + * need to swap R and B, since the HW's format is RGBA8. + */ + if (desc->swizzle[0] == PIPE_SWIZZLE_Z && + cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) { + vc5->swap_color_rb |= 1 << i; + } + + if (desc->swizzle[3] == PIPE_SWIZZLE_1) + vc5->blend_dst_alpha_one |= 1 << i; + } + + vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; +} + +static struct vc5_texture_stateobj * +vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) +{ + switch (shader) { + case PIPE_SHADER_FRAGMENT: + vc5->dirty |= VC5_DIRTY_FRAGTEX; + return &vc5->fragtex; + break; + case PIPE_SHADER_VERTEX: + vc5->dirty |= VC5_DIRTY_VERTTEX; + return &vc5->verttex; + break; + default: + fprintf(stderr, "Unknown shader target %d\n", shader); + abort(); + } +} + +static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) +{ + switch (pipe_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return 0; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return 1; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return 2; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return 3; + case PIPE_TEX_WRAP_CLAMP: + return (using_nearest ? 1 : 3); + default: + unreachable("Unknown wrap mode"); + } +} + + +static void * +vc5_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + MAYBE_UNUSED struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); + + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + bool either_nearest = + (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || + cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); + +#if V3D_VERSION >= 40 + so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), + "sampler"); + void *map = vc5_bo_map(so->bo); + + v3dx_pack(map, SAMPLER_STATE, sampler) { + sampler.wrap_i_border = false; + + sampler.wrap_s = translate_wrap(cso->wrap_s, either_nearest); + sampler.wrap_t = translate_wrap(cso->wrap_t, either_nearest); + sampler.wrap_r = translate_wrap(cso->wrap_r, either_nearest); + + sampler.fixed_bias = cso->lod_bias; + sampler.depth_compare_function = cso->compare_func; + + sampler.min_filter_nearest = + cso->min_img_filter == PIPE_TEX_FILTER_NEAREST; + sampler.mag_filter_nearest = + cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST; + sampler.mip_filter_nearest = + cso->min_mip_filter != PIPE_TEX_MIPFILTER_LINEAR; + + sampler.min_level_of_detail = MIN2(MAX2(0, cso->min_lod), + 15); + sampler.max_level_of_detail = MIN2(cso->max_lod, 15); + + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + sampler.min_level_of_detail = 0; + sampler.max_level_of_detail = 0; + } + + if (cso->max_anisotropy) { + sampler.anisotropy_enable = true; + + if (cso->max_anisotropy > 8) + sampler.maximum_anisotropy = 3; + else if (cso->max_anisotropy > 4) + sampler.maximum_anisotropy = 2; + else if (cso->max_anisotropy > 2) + sampler.maximum_anisotropy = 1; + } + + sampler.border_colour_mode = V3D_BORDER_COLOUR_FOLLOWS; + /* XXX: The border colour field is in the TMU blending format + * (32, f16, or i16), and we need to customize it based on + * that. + * + * XXX: for compat alpha formats, we need the alpha field to + * be in the red channel. + */ + sampler.border_colour_red = + util_float_to_half(cso->border_color.f[0]); + sampler.border_colour_green = + util_float_to_half(cso->border_color.f[1]); + sampler.border_colour_blue = + util_float_to_half(cso->border_color.f[2]); + sampler.border_colour_alpha = + util_float_to_half(cso->border_color.f[3]); + } + +#else /* V3D_VERSION < 40 */ + v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { + p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest); + p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest); + p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest); + } + + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + tex.depth_compare_function = cso->compare_func; + tex.fixed_bias = cso->lod_bias; + } +#endif /* V3D_VERSION < 40 */ + return so; +} + +static void +vc5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + + assert(start == 0); + unsigned i; + unsigned new_nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + stage_tex->samplers[i] = hwcso[i]; + } + + for (; i < stage_tex->num_samplers; i++) { + stage_tex->samplers[i] = NULL; + } + + stage_tex->num_samplers = new_nr; +} + +static void +vc5_sampler_state_delete(struct pipe_context *pctx, + void *hwcso) +{ + struct pipe_sampler_state *psampler = hwcso; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + vc5_bo_unreference(&sampler->bo); + free(psampler); +} + +#if V3D_VERSION >= 40 +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} +#endif + +static struct pipe_sampler_view * +vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_screen *screen = vc5->screen; + struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); + struct vc5_resource *rsc = vc5_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + + pipe_reference(NULL, &prsc->reference); + + /* Compute the sampler view's swizzle up front. This will be plugged + * into either the sampler (for 16-bit returns) or the shader's + * texture key (for 32) + */ + uint8_t view_swizzle[4] = { + cso->swizzle_r, + cso->swizzle_g, + cso->swizzle_b, + cso->swizzle_a + }; + const uint8_t *fmt_swizzle = + vc5_get_format_swizzle(&screen->devinfo, so->base.format); + util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); + + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; + +#if V3D_VERSION >= 40 + so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), + "sampler"); + void *map = vc5_bo_map(so->bo); + + v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { +#else /* V3D_VERSION < 40 */ + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { +#endif + + tex.image_width = prsc->width0 * msaa_scale; + tex.image_height = prsc->height0 * msaa_scale; + +#if V3D_VERSION >= 40 + /* On 4.x, the height of a 1D texture is redefined to be the + * upper 14 bits of the width (which is only usable with txf). + */ + if (prsc->target == PIPE_TEXTURE_1D || + prsc->target == PIPE_TEXTURE_1D_ARRAY) { + tex.image_height = tex.image_width >> 14; + } +#endif + + if (prsc->target == PIPE_TEXTURE_3D) { + tex.image_depth = prsc->depth0; + } else { + tex.image_depth = (cso->u.tex.last_layer - + cso->u.tex.first_layer) + 1; + } + + tex.srgb = util_format_is_srgb(cso->format); + + tex.base_level = cso->u.tex.first_level; +#if V3D_VERSION >= 40 + tex.max_level = cso->u.tex.last_level; + /* Note that we don't have a job to reference the texture's sBO + * at state create time, so any time this sampler view is used + * we need to add the texture to the job. + */ + tex.texture_base_pointer = cl_address(NULL, + rsc->bo->offset + + rsc->slices[0].offset), + + tex.swizzle_r = translate_swizzle(so->swizzle[0]); + tex.swizzle_g = translate_swizzle(so->swizzle[1]); + tex.swizzle_b = translate_swizzle(so->swizzle[2]); + tex.swizzle_a = translate_swizzle(so->swizzle[3]); +#endif + tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64; + + if (prsc->nr_samples > 1 && V3D_VERSION < 40) { + /* Using texture views to reinterpret formats on our + * MSAA textures won't work, because we don't lay out + * the bits in memory as it's expected -- for example, + * RGBA8 and RGB10_A2 are compatible in the + * ARB_texture_view spec, but in HW we lay them out as + * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now + * to catch failures. + * + * We explicitly allow remapping S8Z24 to RGBA8888 for + * vc5_blit.c's stencil blits. + */ + assert((util_format_linear(cso->format) == + util_format_linear(prsc->format)) || + (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM && + cso->format == PIPE_FORMAT_R8G8B8A8_UNORM)); + uint32_t output_image_format = + vc5_get_rt_format(&screen->devinfo, cso->format); + uint32_t internal_type; + uint32_t internal_bpp; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + output_image_format, + &internal_type, + &internal_bpp); + + switch (internal_type) { + case V3D_INTERNAL_TYPE_8: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8; + break; + case V3D_INTERNAL_TYPE_16F: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; + break; + default: + unreachable("Bad MSAA texture type"); + } + + /* sRGB was stored in the tile buffer as linear and + * would have been encoded to sRGB on resolved tile + * buffer store. Note that this means we would need + * shader code if we wanted to read an MSAA sRGB + * texture without sRGB decode. + */ + tex.srgb = false; + } else { + tex.texture_type = vc5_get_tex_format(&screen->devinfo, + cso->format); + } + + /* Since other platform devices may produce UIF images even + * when they're not big enough for V3D to assume they're UIF, + * we force images with level 0 as UIF to be always treated + * that way. + */ + tex.level_0_is_strictly_uif = (rsc->slices[0].tiling == + VC5_TILING_UIF_XOR || + rsc->slices[0].tiling == + VC5_TILING_UIF_NO_XOR); + tex.level_0_xor_enable = (rsc->slices[0].tiling == + VC5_TILING_UIF_XOR); + + if (tex.level_0_is_strictly_uif) + tex.level_0_ub_pad = rsc->slices[0].ub_pad; + +#if V3D_VERSION >= 40 + if (tex.uif_xor_disable || + tex.level_0_is_strictly_uif) { + tex.extended = true; + } +#endif /* V3D_VERSION >= 40 */ + }; + + return &so->base; +} + +static void +vc5_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *psview) +{ + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + vc5_bo_unreference(&sview->bo); + pipe_resource_reference(&psview->texture, NULL); + free(psview); +} + +static void +vc5_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); + } + + for (; i < stage_tex->num_textures; i++) { + pipe_sampler_view_reference(&stage_tex->textures[i], NULL); + } + + stage_tex->num_textures = new_nr; +} + +static struct pipe_stream_output_target * +vc5_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +vc5_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +vc5_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct vc5_context *ctx = vc5_context(pctx); + struct vc5_streamout_stateobj *so = &ctx->streamout; + unsigned i; + + assert(num_targets <= ARRAY_SIZE(so->targets)); + + for (i = 0; i < num_targets; i++) + pipe_so_target_reference(&so->targets[i], targets[i]); + + for (; i < so->num_targets; i++) + pipe_so_target_reference(&so->targets[i], NULL); + + so->num_targets = num_targets; + + ctx->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +v3dX(state_init)(struct pipe_context *pctx) +{ + pctx->set_blend_color = vc5_set_blend_color; + pctx->set_stencil_ref = vc5_set_stencil_ref; + pctx->set_clip_state = vc5_set_clip_state; + pctx->set_sample_mask = vc5_set_sample_mask; + pctx->set_constant_buffer = vc5_set_constant_buffer; + pctx->set_framebuffer_state = vc5_set_framebuffer_state; + pctx->set_polygon_stipple = vc5_set_polygon_stipple; + pctx->set_scissor_states = vc5_set_scissor_states; + pctx->set_viewport_states = vc5_set_viewport_states; + + pctx->set_vertex_buffers = vc5_set_vertex_buffers; + + pctx->create_blend_state = vc5_create_blend_state; + pctx->bind_blend_state = vc5_blend_state_bind; + pctx->delete_blend_state = vc5_generic_cso_state_delete; + + pctx->create_rasterizer_state = vc5_create_rasterizer_state; + pctx->bind_rasterizer_state = vc5_rasterizer_state_bind; + pctx->delete_rasterizer_state = vc5_generic_cso_state_delete; + + pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state; + pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete; + + pctx->create_vertex_elements_state = vc5_vertex_state_create; + pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete; + pctx->bind_vertex_elements_state = vc5_vertex_state_bind; + + pctx->create_sampler_state = vc5_create_sampler_state; + pctx->delete_sampler_state = vc5_sampler_state_delete; + pctx->bind_sampler_states = vc5_sampler_states_bind; + + pctx->create_sampler_view = vc5_create_sampler_view; + pctx->sampler_view_destroy = vc5_sampler_view_destroy; + pctx->set_sampler_views = vc5_set_sampler_views; + + pctx->create_stream_output_target = vc5_create_stream_output_target; + pctx->stream_output_target_destroy = vc5_stream_output_target_destroy; + pctx->set_stream_output_targets = vc5_set_stream_output_targets; +} |