diff options
author | Eric Anholt <[email protected]> | 2014-06-19 08:19:38 +0100 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-08-08 18:59:46 -0700 |
commit | 1850d0a1cbf044dc4d29b7a9ede2c634f667d853 (patch) | |
tree | 80af7674f5160d9ae740958aad047786b2646539 /src/gallium/drivers/vc4 | |
parent | f017e32c0add05b588f5f6a4bea16b84b8a051eb (diff) |
vc4: Initial skeleton driver import.
This mostly just takes every draw call and turns it into a sequence of
commands that clear the FBO and draw a single shaded triangle to it,
regardless of the actual input vertices or shaders. I copied the initial
driver skeleton mostly from freedreno, and I've preserved Rob Clark's
copyright for those. I also based my initial hardcoded shaders and
command lists on Scott Mansell (phire)'s "hackdriver" project, though the
bit patterns of the shaders emitted end up being different.
v2: Rebase on gallium megadrivers changes.
v3: Rebase on PIPE_SHADER_CAP_MAX_CONSTS change.
v4: Rely on simpenrose actually being installed when building for
simulation.
v5: Add more header duplicate-include guards.
v6: Apply Emil's review (protection against vc4 sim and ilo at the same
time, and dropping the dricommon drm bits) and fix a copyright header
(thanks, Roland)
Diffstat (limited to 'src/gallium/drivers/vc4')
25 files changed, 4430 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/.dir-locals.el b/src/gallium/drivers/vc4/.dir-locals.el new file mode 100644 index 00000000000..ed10dc2d7fe --- /dev/null +++ b/src/gallium/drivers/vc4/.dir-locals.el @@ -0,0 +1,12 @@ +((nil + (indent-tabs-mode . nil) + (tab-width . 8) + (c-basic-offset . 8) + (c-file-style . "stroustrup") + (fill-column . 78) + (eval . (progn + (c-set-offset 'innamespace '0) + (c-set-offset 'inline-open '0))) + ) + (makefile-mode (indent-tabs-mode . t)) + ) diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am new file mode 100644 index 00000000000..0b8279d1763 --- /dev/null +++ b/src/gallium/drivers/vc4/Makefile.am @@ -0,0 +1,40 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +if USE_VC4_SIMULATOR +SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1 +SIM_LDFLAGS = -lsimpenrose +endif + +AM_CFLAGS = \ + $(LIBDRM_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(SIM_CFLAGS) \ + $() + +noinst_LTLIBRARIES = libvc4.la + +libvc4_la_SOURCES = $(C_SOURCES) +libvc4_la_LIBADD = $(SIM_LIB) +libvc4_la_LDFLAGS = $(SIM_LDFLAGS) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources new file mode 100644 index 00000000000..294869fe99a --- /dev/null +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -0,0 +1,15 @@ +C_SOURCES := \ + vc4_bufmgr.c \ + vc4_cl.c \ + vc4_context.c \ + vc4_draw.c \ + vc4_emit.c \ + vc4_program.c \ + vc4_qpu.c \ + vc4_qpu_disasm.c \ + vc4_qpu_validate.c \ + vc4_resource.c \ + vc4_screen.c \ + vc4_simulator.c \ + vc4_state.c \ + $() diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c new file mode 100644 index 00000000000..6cf7d163dd0 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <err.h> +#include <stdio.h> +#include <sys/mman.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "util/u_memory.h" + +#include "vc4_context.h" +#include "vc4_screen.h" + +struct vc4_bo * +vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) +{ + struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->size = size; + bo->name = name; + +#ifndef USE_VC4_SIMULATOR + struct drm_mode_create_dumb create; + memset(&create, 0, sizeof(create)); + + create.width = 128; + create.bpp = 8; + create.height = (size + 127) / 128; + + int ret = drmIoctl(screen->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + if (ret != 0) + errx(1, "create ioctl"); + + bo->handle = create.handle; + assert(create.size >= size); +#else /* USE_VC4_SIMULATOR */ + bo->map = vc4_simulator_alloc(screen, size); +#endif /* USE_VC4_SIMULATOR */ + + return bo; +} + +void +vc4_bo_free(struct vc4_bo *bo) +{ +#ifndef USE_VC4_SIMULATOR + struct vc4_screen *screen = bo->screen; + + struct drm_gem_close c; + c.handle = bo->handle; + int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); + if (ret != 0) + fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); +#endif + + free(bo); +} + +struct vc4_bo * +vc4_bo_open_name(struct vc4_screen *screen, uint32_t name, + uint32_t winsys_stride) +{ + struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo); + + struct drm_gem_open o; + o.name = name; + int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); + if (ret) { + fprintf(stderr, "Failed to open bo %d: %s\n", + name, strerror(errno)); + free(bo); + return NULL; + } + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->handle = o.handle; + bo->size = o.size; + +#ifdef USE_VC4_SIMULATOR + vc4_bo_map(bo); + bo->simulator_winsys_map = bo->map; + bo->simulator_winsys_stride = winsys_stride; + bo->map = vc4_simulator_alloc(screen, bo->size); +#endif + + return bo; +} + +struct vc4_bo * +vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size, + const char *name) +{ + void *map; + struct vc4_bo *bo; + + bo = vc4_bo_alloc(screen, size, name); + map = vc4_bo_map(bo); + memcpy(map, data, size); + return bo; +} + +bool +vc4_bo_flink(struct vc4_bo *bo, uint32_t *name) +{ +#ifndef USE_VC4_SIMULATOR + struct drm_gem_flink flink = { + .handle = bo->handle, + }; + int ret = drmIoctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) { + fprintf(stderr, "Failed to flink bo %d: %s\n", + bo->handle, strerror(errno)); + free(bo); + return false; + } + + *name = flink.name; +#endif /* USE_VC4_SIMULATOR */ + + return true; +} + +void * +vc4_bo_map(struct vc4_bo *bo) +{ + int ret; + + if (bo->map) + return bo->map; + + struct drm_mode_map_dumb map; + memset(&map, 0, sizeof(map)); + map.handle = bo->handle; + ret = drmIoctl(bo->screen->fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + if (ret != 0) + errx(1, "map ioctl"); + + bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->screen->fd, map.offset); + if (bo->map == MAP_FAILED) { + errx(1, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + bo->handle, (long long)map.offset, bo->size); + } + + return bo->map; +} diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h new file mode 100644 index 00000000000..14bfa2466df --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_BUFMGR_H +#define VC4_BUFMGR_H + +#include <stdint.h> +#include "util/u_inlines.h" + +struct vc4_context; + +struct vc4_bo { + struct pipe_reference reference; + struct vc4_screen *screen; + void *map; + const char *name; + uint32_t handle; + uint32_t size; + +#ifdef USE_VC4_SIMULATOR + void *simulator_winsys_map; + uint32_t simulator_winsys_stride; +#endif +}; + +struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, + const char *name); +struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, + uint32_t size, const char *name); +void vc4_bo_free(struct vc4_bo *bo); +struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name, + uint32_t winsys_stride); +bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name); + +static inline void +vc4_bo_set_reference(struct vc4_bo **old_bo, struct vc4_bo *new_bo) +{ + if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) + vc4_bo_free(*old_bo); + *old_bo = new_bo; +} + +static inline struct vc4_bo * +vc4_bo_reference(struct vc4_bo *bo) +{ + pipe_reference(NULL, &bo->reference); + return bo; +} + +static inline void +vc4_bo_unreference(struct vc4_bo **bo) +{ + if (pipe_reference(&(*bo)->reference, NULL)) + vc4_bo_free(*bo); + *bo = NULL; +} + + +void * +vc4_bo_map(struct vc4_bo *bo); + +#endif /* VC4_BUFMGR_H */ + diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c new file mode 100644 index 00000000000..3bbeadc49ee --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_cl.c @@ -0,0 +1,74 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "vc4_context.h" + +void +vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl) +{ +#ifdef USE_VC4_SIMULATOR + uint32_t size = 256 * 1024; + cl->base = vc4_simulator_alloc(vc4->screen, size); + cl->end = cl->base + size; + cl->next = cl->base; +#endif +} + +void +vc4_grow_cl(struct vc4_cl *cl) +{ + uint32_t size = MAX2((cl->end - cl->base) * 2, 4096); + uint32_t offset = cl->next -cl->base; + +#ifdef USE_VC4_SIMULATOR + assert(!"not reached"); +#endif + cl->base = realloc(cl->base, size); + cl->end = cl->base + size; + cl->next = cl->base + offset; +} + +void +vc4_reset_cl(struct vc4_cl *cl) +{ + assert(cl->reloc_count == 0); + cl->next = cl->base; +} + +uint32_t +vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo) +{ + uint32_t hindex; + uint32_t *current_handles = vc4->bo_handles.base; + + for (hindex = 0; + hindex < (vc4->bo_handles.next - vc4->bo_handles.base) / 4; + hindex++) { + if (current_handles[hindex] == bo->handle) + return hindex; + } + + cl_u32(&vc4->bo_handles, bo->handle); + return hindex; +} diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h new file mode 100644 index 00000000000..a892444d9c7 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -0,0 +1,132 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_CL_H +#define VC4_CL_H + +#include <stdint.h> + +#include "util/u_math.h" + +#ifdef USE_VC4_SIMULATOR +#include "simpenrose/simpenrose.h" +#endif + +#include "vc4_packet.h" + +struct vc4_bo; + +struct vc4_cl { + void *base; + void *next; + void *end; + uint32_t reloc_next; + uint32_t reloc_count; +}; + +void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl); +void vc4_grow_cl(struct vc4_cl *cl); +void vc4_reset_cl(struct vc4_cl *cl); +uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo); + +static inline void +cl_u8(struct vc4_cl *cl, uint8_t n) +{ + if (cl->next + 1 > cl->end) + vc4_grow_cl(cl); + + *(uint8_t *)cl->next = n; + cl->next++; +} + +static inline void +cl_u16(struct vc4_cl *cl, uint32_t n) +{ + if (cl->next + 2 > cl->end) + vc4_grow_cl(cl); + + *(uint16_t *)cl->next = n; + cl->next += 2; +} + +static inline void +cl_u32(struct vc4_cl *cl, uint32_t n) +{ + if (cl->next + 4 > cl->end) + vc4_grow_cl(cl); + + *(uint32_t *)cl->next = n; + cl->next += 4; +} + +static inline void +cl_f(struct vc4_cl *cl, float f) +{ + cl_u32(cl, fui(f)); +} + +static inline void +cl_start_reloc(struct vc4_cl *cl, uint32_t n) +{ + assert(n == 1 || n == 2); + assert(cl->reloc_count == 0); + cl->reloc_count = n; + +#ifndef USE_VC4_SIMULATOR + cl_u8(cl, GEM_HANDLES); + cl->reloc_next = cl->next - cl->base; + cl_u32(cl, 0); /* Space where hindex will be written. */ + cl_u32(cl, 0); /* Space where hindex will be written. */ +#endif +} + +static inline void +cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n) +{ + assert(cl->reloc_count == 0); + cl->reloc_count = n; + cl->reloc_next = cl->next - cl->base; + +#ifndef USE_VC4_SIMULATOR + for (int i = 0; i < n; i++) + cl_u32(cl, 0); /* Space where hindex will be written. */ +#endif +} + +static inline void +cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, + struct vc4_bo *bo, uint32_t offset) +{ +#ifndef USE_VC4_SIMULATOR + *(uint32_t *)(cl->base + cl->reloc_next) = vc4_gem_hindex(vc4, bo); + cl->reloc_next += 4; +#else + offset += simpenrose_hw_addr(bo->map); +#endif + + cl->reloc_count--; + + cl_u32(cl, offset); +} + +#endif /* VC4_CL_H */ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c new file mode 100644 index 00000000000..51018d1cc94 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -0,0 +1,192 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xf86drm.h> +#include <err.h> +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_blitter.h" +#include "pipe/p_screen.h" + +#define __user +#include "vc4_drm.h" +#include "vc4_screen.h" +#include "vc4_context.h" +#include "vc4_resource.h" + +static void +dump_fbo(struct vc4_context *vc4, struct vc4_bo *fbo) +{ +#ifndef USE_VC4_SIMULATOR + uint32_t *map = vc4_bo_map(fbo); + uint32_t width = vc4->framebuffer.width; + uint32_t height = vc4->framebuffer.height; + uint32_t chunk_w = width / 79; + uint32_t chunk_h = height / 40; + uint32_t found_colors[10]; + uint32_t num_found_colors = 0; + + for (int by = 0; by < height; by += chunk_h) { + for (int bx = 0; bx < width; bx += chunk_w) { + bool on = false, black = false; + + for (int y = by; y < MIN2(height, by + chunk_h); y++) { + for (int x = bx; x < MIN2(width, bx + chunk_w); x++) { + uint32_t pix = map[y * width + x]; + on |= pix != 0; + black |= pix == 0xff000000; + + int i; + for (i = 0; i < num_found_colors; i++) { + if (pix == found_colors[i]) + break; + } + if (i == num_found_colors && + num_found_colors < Elements(found_colors)) + found_colors[num_found_colors++] = pix; + } + } + if (black) + fprintf(stderr, "O"); + else if (on) + fprintf(stderr, "X"); + else + fprintf(stderr, "."); + } + fprintf(stderr, "\n"); + } + + for (int i = 0; i < num_found_colors; i++) { + fprintf(stderr, "color %d: 0x%08x\n", i, found_colors[i]); + } +#endif +} + +void +vc4_flush(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (!vc4->needs_flush) + return; + + struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); + struct vc4_resource *ctex = vc4_resource(csurf->base.texture); + struct drm_vc4_submit_cl submit; + memset(&submit, 0, sizeof(submit)); + + submit.bo_handles = vc4->bo_handles.base; + submit.bo_handle_count = (vc4->bo_handles.next - + vc4->bo_handles.base) / 4; + submit.bin_cl = vc4->bcl.base; + submit.bin_cl_len = vc4->bcl.next - vc4->bcl.base; + submit.render_cl = vc4->rcl.base; + submit.render_cl_len = vc4->rcl.next - vc4->rcl.base; + submit.shader_records = vc4->shader_rec.base; + submit.shader_record_len = vc4->shader_rec.next - vc4->shader_rec.base; + submit.shader_record_count = vc4->shader_rec_count; + +#ifndef USE_VC4_SIMULATOR + int ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); + if (ret) + errx(1, "VC4 submit failed\n"); +#else + vc4_simulator_flush(vc4, csurf); +#endif + vc4_reset_cl(&vc4->bcl); + vc4_reset_cl(&vc4->rcl); + vc4_reset_cl(&vc4->shader_rec); + vc4_reset_cl(&vc4->bo_handles); + vc4->shader_rec_count = 0; + + vc4->needs_flush = false; + vc4->dirty = ~0; + + dump_fbo(vc4, ctex->bo); +} + +static void +vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + vc4_flush(pctx); +} + +static void +vc4_context_destroy(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (vc4->blitter) + util_blitter_destroy(vc4->blitter); + util_slab_destroy(&vc4->transfer_pool); + + free(vc4); +} + +struct pipe_context * +vc4_context_create(struct pipe_screen *pscreen, void *priv) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + struct vc4_context *vc4; + + vc4 = CALLOC_STRUCT(vc4_context); + if (vc4 == NULL) + return NULL; + struct pipe_context *pctx = &vc4->base; + + vc4->screen = screen; + + pctx->screen = pscreen; + pctx->priv = priv; + pctx->destroy = vc4_context_destroy; + pctx->flush = vc4_pipe_flush; + + vc4_draw_init(pctx); + vc4_state_init(pctx); + vc4_program_init(pctx); + vc4_resource_context_init(pctx); + + vc4_init_cl(vc4, &vc4->bcl); + vc4_init_cl(vc4, &vc4->rcl); + vc4_init_cl(vc4, &vc4->shader_rec); + vc4_init_cl(vc4, &vc4->bo_handles); + + vc4->dirty = ~0; + vc4->fd = screen->fd; + + util_slab_create(&vc4->transfer_pool, sizeof(struct pipe_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + vc4->blitter = util_blitter_create(pctx); + if (!vc4->blitter) + goto fail; + + return &vc4->base; + +fail: + pctx->destroy(pctx); + return NULL; +} diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h new file mode 100644 index 00000000000..73bf05bce19 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -0,0 +1,177 @@ +/* + * Copyright © 2014 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_CONTEXT_H +#define VC4_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_slab.h" + +#include "vc4_bufmgr.h" +#include "vc4_resource.h" +#include "vc4_cl.h" + +#define VC4_DIRTY_BLEND (1 << 0) +#define VC4_DIRTY_RASTERIZER (1 << 1) +#define VC4_DIRTY_ZSA (1 << 2) +#define VC4_DIRTY_FRAGTEX (1 << 3) +#define VC4_DIRTY_VERTTEX (1 << 4) +#define VC4_DIRTY_TEXSTATE (1 << 5) +#define VC4_DIRTY_PROG (1 << 6) +#define VC4_DIRTY_BLEND_COLOR (1 << 7) +#define VC4_DIRTY_STENCIL_REF (1 << 8) +#define VC4_DIRTY_SAMPLE_MASK (1 << 9) +#define VC4_DIRTY_FRAMEBUFFER (1 << 10) +#define VC4_DIRTY_STIPPLE (1 << 11) +#define VC4_DIRTY_VIEWPORT (1 << 12) +#define VC4_DIRTY_CONSTBUF (1 << 13) +#define VC4_DIRTY_VTXSTATE (1 << 14) +#define VC4_DIRTY_VTXBUF (1 << 15) +#define VC4_DIRTY_INDEXBUF (1 << 16) +#define VC4_DIRTY_SCISSOR (1 << 17) + +#define VC4_SHADER_DIRTY_VP (1 << 0) +#define VC4_SHADER_DIRTY_FP (1 << 1) + +struct vc4_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + unsigned dirty_samplers; +}; + +struct vc4_shader_state { + struct pipe_shader_state base; + struct vc4_bo *bo; +}; + +struct vc4_program_stateobj { + struct vc4_shader_state *vs, *fs; + uint32_t dirty; + uint8_t num_exports; + /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index + * for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point- + * size) are not included in this + */ + uint8_t export_linkage[63]; +}; + +struct vc4_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc4_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc4_vertex_stateobj { + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned num_elements; +}; + +struct vc4_context { + struct pipe_context base; + + int fd; + struct vc4_screen *screen; + + struct vc4_cl bcl; + struct vc4_cl rcl; + struct vc4_cl shader_rec; + struct vc4_cl bo_handles; + uint32_t shader_rec_count; + + struct util_slab_mempool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC4_DIRTY_* */ + uint32_t dirty; + + /** + * Set if some drawing (triangles, blits, or just a glClear()) has + * been done to the FBO, meaning that we need to + * DRM_IOCTL_VC4_SUBMIT_CL. + */ + bool needs_flush; + + /** @{ Current pipeline state objects */ + struct pipe_scissor_state scissor; + struct pipe_blend_state *blend; + struct vc4_rasterizer_state *rasterizer; + struct pipe_depth_stencil_alpha_state *zsa; + + struct vc4_texture_stateobj verttex, fragtex; + + struct vc4_program_stateobj prog; + + struct vc4_vertex_stateobj *vtx; + + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple stipple; + struct pipe_viewport_state viewport; + struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct vc4_vertexbuf_stateobj vertexbuf; + struct pipe_index_buffer indexbuf; + /** @} */ +}; + +struct vc4_rasterizer_state { + struct pipe_rasterizer_state base; + + /* VC4_CONFIGURATION_BITS */ + uint8_t config_bits[3]; + + float point_size; +}; + +static inline struct vc4_context * +vc4_context(struct pipe_context *pcontext) +{ + return (struct vc4_context *)pcontext; +} + +struct pipe_context *vc4_context_create(struct pipe_screen *pscreen, + void *priv); +void vc4_draw_init(struct pipe_context *pctx); +void vc4_state_init(struct pipe_context *pctx); +void vc4_program_init(struct pipe_context *pctx); +void vc4_simulator_init(struct vc4_screen *screen); +void vc4_simulator_flush(struct vc4_context *vc4, + struct vc4_surface *color_surf); +void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size); + +void vc4_flush(struct pipe_context *pctx); +void vc4_emit_state(struct pipe_context *pctx); + +#endif /* VC4_CONTEXT_H */ diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c new file mode 100644 index 00000000000..4c25dabae7d --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2014 Scott Mansell + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> + +#include "vc4_context.h" +#include "vc4_resource.h" + +static struct vc4_bo * +get_vbo(struct vc4_context *vc4, uint32_t width, uint32_t height) +{ + struct { + uint16_t x, y; + float z, rhw, r, g, b; + } verts[] = { + { + // Vertex: Top, red + (-(int)width / 3) << 4, // X in 12.4 fixed point + (-(int)height / 3) << 4, // Y in 12.4 fixed point + 1.0f, // Z + 1.0f, // 1/W + 1.0f, // Varying 0 (Red) + 0.0f, // Varying 1 (Green) + 0.0f, // Varying 2 (Blue) + }, + { + // Vertex: bottom left, Green + (width / 3) << 4, // X in 12.4 fixed point + (-(int)height / 3) << 4, // Y in 12.4 fixed point + 1.0f, // Z + 1.0f, // 1/W + 0.0f, // Varying 0 (Red) + 1.0f, // Varying 1 (Green) + 0.0f, // Varying 2 (Blue) + }, + + { + // Vertex: bottom right, Blue + (width / 3) << 4, // X in 12.4 fixed point + (height / 3) << 4, // Y in 12.4 fixed point + 1.0f, // Z + 1.0f, // 1/W + 0.0f, // Varying 0 (Red) + 0.0f, // Varying 1 (Green) + 1.0f, // Varying 2 (Blue) + }, + }; + + return vc4_bo_alloc_mem(vc4->screen, verts, sizeof(verts), "verts"); +} +static struct vc4_bo * +get_ibo(struct vc4_context *vc4) +{ + static const uint8_t indices[] = { 0, 1, 2 }; + + return vc4_bo_alloc_mem(vc4->screen, indices, sizeof(indices), "indices"); +} + +static void +vc4_rcl_tile_calls(struct vc4_context *vc4, + uint32_t xtiles, uint32_t ytiles, + struct vc4_bo *tile_alloc) +{ + for (int x = 0; x < xtiles; x++) { + for (int y = 0; y < ytiles; y++) { + cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); + cl_u8(&vc4->rcl, x); + cl_u8(&vc4->rcl, y); + + cl_start_reloc(&vc4->rcl, 1); + cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); + cl_reloc(vc4, &vc4->rcl, tile_alloc, + (y * xtiles + x) * 32); + + if (x == xtiles - 1 && y == ytiles - 1) { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + } else { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER); + } + } + } +} + +static void +vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc4_context *vc4 = vc4_context(pctx); + uint32_t width = vc4->framebuffer.width; + uint32_t height = vc4->framebuffer.height; + uint32_t tilew = align(width, 64) / 64; + uint32_t tileh = align(height, 64) / 64; + struct vc4_bo *tile_alloc = vc4_bo_alloc(vc4->screen, + 32 * tilew * tileh, "tilea"); + struct vc4_bo *tile_state = vc4_bo_alloc(vc4->screen, + 48 * tilew * tileh, "tilestate"); + struct vc4_bo *ibo = get_ibo(vc4); + + struct vc4_bo *fs_uniform = vc4_bo_alloc(vc4->screen, 0x1000, "fsu"); + struct vc4_bo *vbo = get_vbo(vc4, width, height); + + vc4->needs_flush = true; + + // Tile state data is 48 bytes per tile, I think it can be thrown away + // as soon as binning is finished. + cl_start_reloc(&vc4->bcl, 2); + cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG); + cl_reloc(vc4, &vc4->bcl, tile_alloc, 0); + cl_u32(&vc4->bcl, 0x8000); /* tile allocation memory size */ + cl_reloc(vc4, &vc4->bcl, tile_state, 0); + cl_u8(&vc4->bcl, tilew); + cl_u8(&vc4->bcl, tileh); + cl_u8(&vc4->bcl, VC4_BIN_CONFIG_AUTO_INIT_TSDA); + + cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING); + + cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT); + cl_u8(&vc4->bcl, 0x32); // 16 bit triangle + + vc4_emit_state(pctx); + + /* the actual draw call. */ + cl_u8(&vc4->bcl, VC4_PACKET_NV_SHADER_STATE); +#ifndef USE_VC4_SIMULATOR + cl_u32(&vc4->bcl, 0); /* offset into shader_rec */ +#else + cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next)); +#endif + + cl_start_reloc(&vc4->bcl, 1); + cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); + cl_u8(&vc4->bcl, 0x04); // 8bit index, trinagles + cl_u32(&vc4->bcl, 3); // Length + cl_reloc(vc4, &vc4->bcl, ibo, 0); + cl_u32(&vc4->bcl, 2); // Maximum index + + cl_u8(&vc4->bcl, VC4_PACKET_FLUSH_ALL); + cl_u8(&vc4->bcl, VC4_PACKET_NOP); + cl_u8(&vc4->bcl, VC4_PACKET_HALT); + +// Shader Record + cl_start_shader_reloc(&vc4->shader_rec, 3); + + cl_u8(&vc4->shader_rec, 0); + cl_u8(&vc4->shader_rec, 6*4); // stride + cl_u8(&vc4->shader_rec, 0xcc); // num uniforms (not used) + cl_u8(&vc4->shader_rec, 3); // num varyings + cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0); + cl_reloc(vc4, &vc4->shader_rec, fs_uniform, 0); + cl_reloc(vc4, &vc4->shader_rec, vbo, 0); + + vc4->shader_rec_count++; + + cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); + cl_u32(&vc4->rcl, 0xff000000); // Opaque Black + cl_u32(&vc4->rcl, 0xff000000); // 32 bit clear colours need to be repeated twice + cl_u32(&vc4->rcl, 0); + cl_u8(&vc4->rcl, 0); + + struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); + struct vc4_resource *ctex = vc4_resource(csurf->base.texture); + + cl_start_reloc(&vc4->rcl, 1); + cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); + cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset); + cl_u16(&vc4->rcl, width); + cl_u16(&vc4->rcl, height); + cl_u8(&vc4->rcl, (VC4_RENDER_CONFIG_MEMORY_FORMAT_LINEAR | + VC4_RENDER_CONFIG_FORMAT_RGBA8888)); + cl_u8(&vc4->rcl, 0); + + // Do a store of the first tile to force the tile buffer to be cleared + /* XXX: I think these two packets may be unnecessary. */ + cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); + cl_u8(&vc4->rcl, 0); + cl_u8(&vc4->rcl, 0); + + cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + cl_u16(&vc4->rcl, 0); // Store nothing (just clear) + cl_u32(&vc4->rcl, 0); // no address is needed + + vc4_rcl_tile_calls(vc4, tilew, tileh, tile_alloc); + + vc4_flush(pctx); +} + +static void +vc4_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + vc4->needs_flush = true; +} + +static void +vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + fprintf(stderr, "unimpl: clear RT\n"); +} + +static void +vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + fprintf(stderr, "unimpl: clear DS\n"); +} + +void +vc4_draw_init(struct pipe_context *pctx) +{ + pctx->draw_vbo = vc4_draw_vbo; + pctx->clear = vc4_clear; + pctx->clear_render_target = vc4_clear_render_target; + pctx->clear_depth_stencil = vc4_clear_depth_stencil; +} diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h new file mode 100644 index 00000000000..430d7cc1314 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_drm.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _UAPI_VC4_DRM_H_ +#define _UAPI_VC4_DRM_H_ + +#include <drm.h> + +#define DRM_VC4_SUBMIT_CL 0x00 + +#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) + +struct drm_vc4_submit_cl { + void __user *bin_cl; + void __user *render_cl; + void __user *shader_records; + void __user *bo_handles; + uint32_t bin_cl_len; + uint32_t render_cl_len; + uint32_t shader_record_len; + uint32_t shader_record_count; + uint32_t bo_handle_count; +}; + +#endif /* _UAPI_VC4_DRM_H_ */ diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c new file mode 100644 index 00000000000..9c41505288c --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vc4_context.h" + +void +vc4_emit_state(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (vc4->dirty & VC4_DIRTY_SCISSOR) { + cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW); + cl_u16(&vc4->bcl, vc4->scissor.minx); + cl_u16(&vc4->bcl, vc4->scissor.miny); + cl_u16(&vc4->bcl, vc4->scissor.maxx - vc4->scissor.minx); + cl_u16(&vc4->bcl, vc4->scissor.maxy - vc4->scissor.miny); + } + + if (vc4->dirty & VC4_DIRTY_RASTERIZER) { + cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS); + cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[0]); + cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[1]); + cl_u8(&vc4->bcl, vc4->rasterizer->config_bits[2]); + } + + if (vc4->dirty & VC4_DIRTY_VIEWPORT) { + cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING); + cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f); + cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f); + + cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING); + cl_f(&vc4->bcl, vc4->viewport.translate[2]); + cl_f(&vc4->bcl, vc4->viewport.scale[2]); + + cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET); + cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]); + cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]); + } +} diff --git a/src/gallium/drivers/vc4/vc4_packet.h b/src/gallium/drivers/vc4/vc4_packet.h new file mode 100644 index 00000000000..e7c334c5556 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_packet.h @@ -0,0 +1,194 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_PACKET_H +#define VC4_PACKET_H + +enum vc4_packet { + VC4_PACKET_HALT = 0, + VC4_PACKET_NOP = 1, + + VC4_PACKET_FLUSH = 4, + VC4_PACKET_FLUSH_ALL = 5, + VC4_PACKET_START_TILE_BINNING = 6, + VC4_PACKET_INCREMENT_SEMAPHORE = 7, + VC4_PACKET_WAIT_ON_SEMAPHORE = 8, + + VC4_PACKET_BRANCH = 16, + VC4_PACKET_BRANCH_TO_SUB_LIST = 17, + + VC4_PACKET_STORE_MS_TILE_BUFFER = 24, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, + + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, + + VC4_PACKET_COMPRESSED_PRIMITIVE = 48, + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, + + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, + + VC4_PACKET_GL_SHADER_STATE = 64, + VC4_PACKET_NV_SHADER_STATE = 65, + VC4_PACKET_VG_SHADER_STATE = 66, + + VC4_PACKET_CONFIGURATION_BITS = 96, + VC4_PACKET_FLAT_SHADE_FLAGS = 97, + VC4_PACKET_POINT_SIZE = 98, + VC4_PACKET_LINE_WIDTH = 99, + VC4_PACKET_RHT_X_BOUNDARY = 100, + VC4_PACKET_DEPTH_OFFSET = 101, + VC4_PACKET_CLIP_WINDOW = 102, + VC4_PACKET_VIEWPORT_OFFSET = 103, + VC4_PACKET_Z_CLIPPING = 104, + VC4_PACKET_CLIPPER_XY_SCALING = 105, + VC4_PACKET_CLIPPER_Z_SCALING = 106, + + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, + VC4_PACKET_CLEAR_COLORS = 114, + VC4_PACKET_TILE_COORDINATES = 115, + GEM_HANDLES = 254, +} __attribute__ ((__packed__)); + +/** @{ + * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL (low bits of the + * address) + */ + +#define VC4_STORE_TILE_BUFFER_DISABLE_FULL_VG_MASK_DUMP (1 << 2) +#define VC4_STORE_TILE_BUFFER_DISABLE_FULL_ZS_DUMP (1 << 1) +#define VC4_STORE_TILE_BUFFER_DISABLE_FULL_COLOR_DUMP (1 << 0) + +/** @} */ + +/** @{ byte 1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL */ +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 7) +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 6) +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 5) +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 4) + +#define VC4_STORE_TILE_BUFFER_RGBA8888 (0 << 0) +#define VC4_STORE_TILE_BUFFER_BGR565_DITHER (1 << 0) +#define VC4_STORE_TILE_BUFFER_BGR565 (2 << 0) +/** @} */ + +/** @{ byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL */ +#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) + +#define VC4_STORE_TILE_BUFFER_FORMAT_RASTER (0 << 4) +#define VC4_STORE_TILE_BUFFER_FORMAT_T (1 << 4) +#define VC4_STORE_TILE_BUFFER_FORMAT_LT (2 << 4) + +#define VC4_STORE_TILE_BUFFER_NONE (0 << 0) +#define VC4_STORE_TILE_BUFFER_COLOR (1 << 0) +#define VC4_STORE_TILE_BUFFER_ZS (2 << 0) +#define VC4_STORE_TILE_BUFFER_Z (3 << 0) +#define VC4_STORE_TILE_BUFFER_VG_MASK (4 << 0) +#define VC4_STORE_TILE_BUFFER_FULL (5 << 0) +/** @} */ + +/* This flag is only present in NV shader state. */ +#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3) +#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2) +#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1) +#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0) + +/** @{ byte 2 of config bits. */ +#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1) +#define VC4_CONFIG_BITS_EARLY_Z (1 << 0) +/** @} */ + +/** @{ byte 1 of config bits. */ +#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7) +/** same values in this 3-bit field as PIPE_FUNC_* */ +#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 +#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3) + +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) + +#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0) +/** @} */ + +/** @{ byte 0 of config bits. */ +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6) +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) + +#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4) +#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3) +#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2) +#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1) +#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0) +/** @} */ + +/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ +#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) + +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 (0 << 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 (1 << 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 (2 << 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 (3 << 5) + +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 (0 << 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 (1 << 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 (2 << 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 (3 << 3) + +#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) +#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) +#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0) +/** @} */ + +/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ +#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12) +#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11) +#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10) +#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9) +#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) + +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_LINEAR (0 << 6) +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_T (1 << 6) +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_LT (2 << 6) + +#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) +#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) +#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) + +#define VC4_RENDER_CONFIG_FORMAT_BGR565 (0 << 2) +#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 (1 << 2) +#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED (2 << 2) + +#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) +#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) + +#endif /* VC4_PACKET_H */ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c new file mode 100644 index 00000000000..2fe7c216838 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2014 Scott Mansell + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <inttypes.h> +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "vc4_context.h" +#include "vc4_qpu.h" + +static void +vc4_dump_program(const uint64_t *insts, uint count) +{ + for (int i = 0; i < count; i++) { + fprintf(stderr, "0x%016"PRIx64" ", insts[i]); + vc4_qpu_disasm(&insts[i], 1); + fprintf(stderr, "\n"); + } +} + +static struct vc4_shader_state * +vc4_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state); + if (!so) + return NULL; + + so->base.tokens = tgsi_dup_tokens(cso->tokens); + + return so; +} + +static void * +vc4_fs_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso); + if (!so) + return NULL; + + uint64_t gen_fsc[100]; + uint64_t cur_inst; + int gen_fsc_len = 0; +#if 1 + cur_inst = qpu_load_imm_f(qpu_r5(), 0.0f); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_MOV(qpu_r0(), qpu_vary()), + qpu_m_MOV(qpu_r3(), qpu_r5())); + cur_inst |= QPU_PM; + cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8D, QPU_PACK); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_FADD(qpu_r0(), qpu_r0(), qpu_r5()), + qpu_m_MOV(qpu_r1(), qpu_vary())); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_FADD(qpu_r1(), qpu_r1(), qpu_r5()), + qpu_m_MOV(qpu_r2(), qpu_vary())); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_WAIT_FOR_SCOREBOARD, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_FADD(qpu_r2(), qpu_r2(), qpu_r5()), + qpu_m_MOV(qpu_r3(), qpu_r0())); + cur_inst |= QPU_PM; + cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8A, QPU_PACK); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), + qpu_m_MOV(qpu_r3(), qpu_r1())); + cur_inst |= QPU_PM; + cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8B, QPU_PACK); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), + qpu_m_MOV(qpu_r3(), qpu_r2())); + cur_inst |= QPU_PM; + cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8C, QPU_PACK); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_MOV(qpu_tlbc(), qpu_r3()), + qpu_m_NOP()); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_SCOREBOARD_UNLOCK, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; + +#else + + /* drain the varyings. */ + for (int i = 0; i < 3; i++) { + cur_inst = qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_rb(QPU_R_NOP)), + qpu_m_NOP()); + if (i == 1) + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_WAIT_FOR_SCOREBOARD, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen_fsc[gen_fsc_len++] = cur_inst; + } + + /* some colors */ +#if 1 + for (int i = 0; i < 4; i++) { + cur_inst = qpu_load_imm_f(qpu_rn(i), .2 + i / 4.0); + gen_fsc[gen_fsc_len++] = cur_inst; + } + + for (int i = 0; i < 4; i++) { + cur_inst = qpu_inst(qpu_a_NOP(), + qpu_m_FMUL(qpu_ra(1), + qpu_rn(i), qpu_rn(i))); + cur_inst |= QPU_PM; + cur_inst |= QPU_SET_FIELD(QPU_PACK_A_8A + i, QPU_PACK); + gen_fsc[gen_fsc_len++] = cur_inst; + } +#else + cur_inst = qpu_load_imm_ui(qpu_ra(1), 0x22446688); + gen_fsc[gen_fsc_len++] = cur_inst; +#endif + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_MOV(qpu_tlbc(), qpu_ra(1)), + qpu_m_NOP()); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen_fsc[gen_fsc_len++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_SCOREBOARD_UNLOCK, QPU_SIG); + gen_fsc[gen_fsc_len++] = cur_inst; +#endif + + + if (1) + vc4_dump_program(gen_fsc, gen_fsc_len); + vc4_qpu_validate(gen_fsc, gen_fsc_len); + + so->bo = vc4_bo_alloc_mem(vc4->screen, gen_fsc, + gen_fsc_len * sizeof(uint64_t), "fs_code"); + + return so; +} + +static void * +vc4_vs_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso); + if (!so) + return NULL; + + + return so; +} + +static void +vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct pipe_shader_state *so = hwcso; + + free((void *)so->tokens); + free(so); +} + +static void +vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->prog.fs = hwcso; + vc4->prog.dirty |= VC4_SHADER_DIRTY_FP; + vc4->dirty |= VC4_DIRTY_PROG; +} + +static void +vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->prog.vs = hwcso; + vc4->prog.dirty |= VC4_SHADER_DIRTY_VP; + vc4->dirty |= VC4_DIRTY_PROG; +} + +void +vc4_program_init(struct pipe_context *pctx) +{ + pctx->create_vs_state = vc4_vs_state_create; + pctx->delete_vs_state = vc4_shader_state_delete; + + pctx->create_fs_state = vc4_fs_state_create; + pctx->delete_fs_state = vc4_shader_state_delete; + + pctx->bind_fs_state = vc4_fp_state_bind; + pctx->bind_vs_state = vc4_vp_state_bind; +} diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c new file mode 100644 index 00000000000..18863f7eac1 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -0,0 +1,210 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdbool.h> +#include "vc4_qpu.h" + +static uint64_t +set_src_raddr(uint64_t inst, struct qpu_reg src) +{ + if (src.mux == QPU_MUX_A) { + /* These asserts could be better, checking to be sure we're + * not overwriting an actual use of a raddr of 0. + */ + assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == 0 || + QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr); + return inst | QPU_SET_FIELD(src.addr, QPU_RADDR_A); + } + + if (src.mux == QPU_MUX_B) { + assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == 0 || + QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); + return inst | QPU_SET_FIELD(src.addr, QPU_RADDR_B); + } + + return inst; +} + +uint64_t +qpu_a_NOP() +{ + uint64_t inst = 0; + + inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD); + inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +uint64_t +qpu_m_NOP() +{ + uint64_t inst = 0; + + inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL); + inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +static uint64_t +qpu_a_dst(struct qpu_reg dst) +{ + uint64_t inst = 0; + + if (dst.mux <= QPU_MUX_R5) { + /* Translate the mux to the ACCn values. */ + inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD); + } else { + inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD); + if (dst.mux == QPU_MUX_B) + inst |= QPU_WS; + } + + return inst; +} + +static uint64_t +qpu_m_dst(struct qpu_reg dst) +{ + uint64_t inst = 0; + + if (dst.mux <= QPU_MUX_R5) { + /* Translate the mux to the ACCn values. */ + inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL); + } else { + inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL); + if (dst.mux == QPU_MUX_A) + inst |= QPU_WS; + } + + return inst; +} + +uint64_t +qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) +{ + uint64_t inst = 0; + + inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); + inst |= qpu_a_dst(dst); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); + inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A); + inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B); + inst |= set_src_raddr(inst, src); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +uint64_t +qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) +{ + uint64_t inst = 0; + + inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); + inst |= qpu_m_dst(dst); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); + inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A); + inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B); + inst |= set_src_raddr(inst, src); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +uint64_t +qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) +{ + uint64_t inst = 0; + + inst |= qpu_a_dst(dst); + inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); + inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG); + inst |= val; + + return inst; +} + +uint64_t +qpu_a_alu2(enum qpu_op_add op, + struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) +{ + uint64_t inst = 0; + + inst |= QPU_SET_FIELD(op, QPU_OP_ADD); + inst |= qpu_a_dst(dst); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); + inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A); + inst |= set_src_raddr(inst, src0); + inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B); + inst |= set_src_raddr(inst, src1); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +uint64_t +qpu_m_alu2(enum qpu_op_mul op, + struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) +{ + uint64_t inst = 0; + + set_src_raddr(inst, src0); + set_src_raddr(inst, src1); + + inst |= QPU_SET_FIELD(op, QPU_OP_MUL); + inst |= qpu_m_dst(dst); + inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); + inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A); + inst |= set_src_raddr(inst, src0); + inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B); + inst |= set_src_raddr(inst, src1); + inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); + + return inst; +} + +uint64_t +qpu_inst(uint64_t add, uint64_t mul) +{ + uint64_t merge = add | mul; + + /* If either one has no signal field, then use the other's signal field. + * (since QPU_SIG_NONE != 0). + */ + if (QPU_GET_FIELD(add, QPU_SIG) == QPU_SIG_NONE) + merge = (merge & ~QPU_SIG_MASK) | (mul & QPU_SIG_MASK); + else if (QPU_GET_FIELD(mul, QPU_SIG) == QPU_SIG_NONE) + merge = (merge & ~QPU_SIG_MASK) | (add & QPU_SIG_MASK); + else { + assert(QPU_GET_FIELD(add, QPU_SIG) == + QPU_GET_FIELD(mul, QPU_SIG)); + } + + return merge; +} diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h new file mode 100644 index 00000000000..00aebf0a706 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -0,0 +1,201 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_QPU_H +#define VC4_QPU_H + +#include <stdint.h> + +#include "util/u_math.h" + +#include "vc4_qpu_defines.h" + +struct qpu_reg { + enum qpu_mux mux; + uint8_t addr; +}; + +static inline struct qpu_reg +qpu_rn(int n) +{ + struct qpu_reg r = { + QPU_MUX_R0 + n, + 0, + }; + + return r; +} + +static inline struct qpu_reg +qpu_ra(int addr) +{ + struct qpu_reg r = { + QPU_MUX_A, + addr, + }; + + return r; +} + +static inline struct qpu_reg +qpu_rb(int addr) +{ + struct qpu_reg r = { + QPU_MUX_B, + addr, + }; + + return r; +} + +static inline struct qpu_reg +qpu_vary() +{ + struct qpu_reg r = { + QPU_MUX_A, + QPU_R_VARY, + }; + + return r; +} + +static inline struct qpu_reg +qpu_unif() +{ + struct qpu_reg r = { + QPU_MUX_A, + QPU_R_UNIF, + }; + + return r; +} + +static inline struct qpu_reg +qpu_vrsetup() +{ + return qpu_ra(QPU_W_VPMVCD_SETUP); +} + +static inline struct qpu_reg +qpu_vwsetup() +{ + return qpu_rb(QPU_W_VPMVCD_SETUP); +} + +static inline struct qpu_reg +qpu_tlbc() +{ + struct qpu_reg r = { + QPU_MUX_A, + QPU_W_TLB_COLOR_ALL, + }; + + return r; +} + +static inline struct qpu_reg qpu_r0(void) { return qpu_rn(0); } +static inline struct qpu_reg qpu_r1(void) { return qpu_rn(1); } +static inline struct qpu_reg qpu_r2(void) { return qpu_rn(2); } +static inline struct qpu_reg qpu_r3(void) { return qpu_rn(3); } +static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); } +static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); } + +uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src); +uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src); +uint64_t qpu_a_NOP(void); +uint64_t qpu_m_NOP(void); +uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst, + struct qpu_reg src0, struct qpu_reg src1); +uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst, + struct qpu_reg src0, struct qpu_reg src1); +uint64_t qpu_inst(uint64_t add, uint64_t mul); +uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val); + +static inline uint64_t +qpu_load_imm_f(struct qpu_reg dst, float val) +{ + return qpu_load_imm_ui(dst, fui(val)); +} + +#define A_ALU2(op) \ +static inline uint64_t \ +qpu_a_##op(struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) \ +{ \ + return qpu_a_alu2(QPU_A_##op, dst, src0, src1); \ +} + +#define M_ALU2(op) \ +static inline uint64_t \ +qpu_m_##op(struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) \ +{ \ + return qpu_m_alu2(QPU_M_##op, dst, src0, src1); \ +} + +#define A_ALU1(op) \ +static inline uint64_t \ +qpu_a_##op(struct qpu_reg dst, struct qpu_reg src0) \ +{ \ + return qpu_a_alu2(QPU_A_##op, dst, src0, src0); \ +} + +/*A_ALU2(NOP) */ +A_ALU2(FADD) +A_ALU2(FSUB) +A_ALU2(FMIN) +A_ALU2(FMAX) +A_ALU2(MINABS) +A_ALU2(MAXABS) +A_ALU1(FTOI) +A_ALU1(ITOF) +A_ALU2(ADD) +A_ALU2(SUB) +A_ALU2(SHR) +A_ALU2(ASR) +A_ALU2(ROR) +A_ALU2(SHL) +A_ALU2(MIN) +A_ALU2(MAX) +A_ALU2(AND) +A_ALU2(OR) +A_ALU2(XOR) +A_ALU1(NOT) +A_ALU1(CLZ) +A_ALU2(V8ADDS) +A_ALU2(V8SUBS) + +/* M_ALU2(NOP) */ +M_ALU2(FMUL) +M_ALU2(MUL24) +M_ALU2(V8MULD) +M_ALU2(V8MIN) +M_ALU2(V8MAX) +M_ALU2(V8ADDS) +M_ALU2(V8SUBS) + +void +vc4_qpu_disasm(const uint64_t *instructions, int num_instructions); + +void +vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); + +#endif /* VC4_QPU_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h new file mode 100644 index 00000000000..d066f278ab3 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -0,0 +1,255 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_QPU_DEFINES_H +#define VC4_QPU_DEFINES_H + +#include <assert.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +enum qpu_op_add { + QPU_A_NOP, + QPU_A_FADD, + QPU_A_FSUB, + QPU_A_FMIN, + QPU_A_FMAX, + QPU_A_MINABS, + QPU_A_MAXABS, + QPU_A_FTOI, + QPU_A_ITOF, + QPU_A_ADD = 12, + QPU_A_SUB, + QPU_A_SHR, + QPU_A_ASR, + QPU_A_ROR, + QPU_A_SHL, + QPU_A_MIN, + QPU_A_MAX, + QPU_A_AND, + QPU_A_OR, + QPU_A_XOR, + QPU_A_NOT, + QPU_A_CLZ, + QPU_A_V8ADDS = 30, + QPU_A_V8SUBS = 31, +}; + +enum qpu_op_mul { + QPU_M_NOP, + QPU_M_FMUL, + QPU_M_MUL24, + QPU_M_V8MULD, + QPU_M_V8MIN, + QPU_M_V8MAX, + QPU_M_V8ADDS, + QPU_M_V8SUBS, +}; + +enum qpu_raddr { + /* 0-31 are the plain regfile a or b fields */ + QPU_R_UNIF = 32, + QPU_R_VARY = 35, + QPU_R_ELEM_QPU = 38, + QPU_R_NOP, + QPU_R_XY_PIXEL_COORD = 41, + QPU_R_MS_REV_FLAGS = 41, + QPU_R_VPM = 48, + QPU_R_VPM_LD_BUSY, + QPU_R_VPM_LD_WAIT, + QPU_R_MUTEX_ACQUIRE, +}; + +enum qpu_waddr { + /* 0-31 are the plain regfile a or b fields */ + QPU_W_ACC0 = 32, /* aka r0 */ + QPU_W_ACC1, + QPU_W_ACC2, + QPU_W_ACC3, + QPU_W_TMU_NOSWAP, + QPU_W_ACC5, + QPU_W_HOST_INT, + QPU_W_NOP, + QPU_W_UNIFORMS_ADDRESS, + QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */ + QPU_W_MS_FLAGS = 42, + QPU_W_REV_FLAG = 42, + QPU_W_TLB_STENCIL_SETUP = 43, + QPU_W_TLB_Z, + QPU_W_TLB_COLOR_MS, + QPU_W_TLB_COLOR_ALL, + QPU_W_TLB_ALPHA_MASK, + QPU_W_VPM, + QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ + QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ + QPU_W_MUTEX_RELEASE, + QPU_W_SFU_RECIP, + QPU_W_SFU_RECIPSQRT, + QPU_W_SFU_EXP, + QPU_W_SFU_LOG, + QPU_W_TMU0_S, + QPU_W_TMU0_T, + QPU_W_TMU0_R, + QPU_W_TMU0_B, + QPU_W_TMU1_S, + QPU_W_TMU1_T, + QPU_W_TMU1_R, + QPU_W_TMU1_B, +}; + +enum qpu_sig_bits { + QPU_SIG_SW_BREAKPOINT, + QPU_SIG_NONE, + QPU_SIG_THREAD_SWITCH, + QPU_SIG_PROG_END, + QPU_SIG_WAIT_FOR_SCOREBOARD, + QPU_SIG_SCOREBOARD_UNLOCK, + QPU_SIG_LAST_THREAD_SWITCH, + QPU_SIG_COVERAGE_LOAD, + QPU_SIG_COLOR_LOAD, + QPU_SIG_COLOR_LOAD_END, + QPU_SIG_LOAD_TMU0, + QPU_SIG_LOAD_TMU1, + QPU_SIG_ALPHA_MASK_LOAD, + QPU_SIG_SMALL_IMM, + QPU_SIG_LOAD_IMM, + QPU_SIG_BRANCH +}; + +enum qpu_mux { + /* hardware mux values */ + QPU_MUX_R0, + QPU_MUX_R1, + QPU_MUX_R2, + QPU_MUX_R3, + QPU_MUX_R4, + QPU_MUX_R5, + QPU_MUX_A, + QPU_MUX_B, + + /* non-hardware mux values */ + QPU_MUX_IMM, +}; + +enum qpu_cond { + QPU_COND_NEVER, + QPU_COND_ALWAYS, + QPU_COND_ZS, + QPU_COND_ZC, + QPU_COND_NS, + QPU_COND_NC, + QPU_COND_CS, + QPU_COND_CC, +}; + +enum qpu_pack_mul { + QPU_PACK_MUL_NOP, + QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */ + QPU_PACK_MUL_8A, + QPU_PACK_MUL_8B, + QPU_PACK_MUL_8C, + QPU_PACK_MUL_8D, +}; + +enum qpu_pack_a { + QPU_PACK_A_NOP, + /* convert to 16 bit float if float input, or to int16. */ + QPU_PACK_A_16A, + QPU_PACK_A_16B, + /* replicated to each 8 bits of the 32-bit dst. */ + QPU_PACK_A_8888, + /* Convert to 8-bit unsigned int. */ + QPU_PACK_A_8A, + QPU_PACK_A_8B, + QPU_PACK_A_8C, + QPU_PACK_A_8D, + + /* Saturating variants of the previous instructions. */ + QPU_PACK_A_32_SAT, /* int-only */ + QPU_PACK_A_16A_SAT, /* int or float */ + QPU_PACK_A_16B_SAT, + QPU_PACK_A_8888_SAT, + QPU_PACK_A_8A_SAT, + QPU_PACK_A_8B_SAT, + QPU_PACK_A_8C_SAT, + QPU_PACK_A_8D_SAT, +}; + +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) +/* Using the GNU statement expression extension */ +#define QPU_SET_FIELD(value, field) \ + ({ \ + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) + +#define QPU_SIG_SHIFT 60 +#define QPU_SIG_MASK QPU_MASK(63, 60) + +/** + * If set, the pack field means PACK_MUL or R4 packing, instead of normal + * regfile a packing. + */ +#define QPU_PM ((uint64_t)1 << 56) + +#define QPU_PACK_SHIFT 52 +#define QPU_PACK_MASK QPU_MASK(55, 52) + +#define QPU_COND_ADD_SHIFT 49 +#define QPU_COND_ADD_MASK QPU_MASK(51, 49) +#define QPU_COND_MUL_SHIFT 46 +#define QPU_COND_MUL_MASK QPU_MASK(48, 46) + +#define QPU_WADDR_ADD_SHIFT 38 +#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38) +#define QPU_WADDR_MUL_SHIFT 32 +#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32) + +#define QPU_OP_MUL_SHIFT 29 +#define QPU_OP_MUL_MASK QPU_MASK(31, 29) + +#define QPU_RADDR_A_SHIFT 18 +#define QPU_RADDR_A_MASK QPU_MASK(23, 18) +#define QPU_RADDR_B_SHIFT 12 +#define QPU_RADDR_B_MASK QPU_MASK(17, 12) +#define QPU_SMALL_IMM_SHIFT 12 +#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12) + +#define QPU_ADD_A_SHIFT 9 +#define QPU_ADD_A_MASK QPU_MASK(11, 9) +#define QPU_ADD_B_SHIFT 6 +#define QPU_ADD_B_MASK QPU_MASK(8, 6) +#define QPU_MUL_A_SHIFT 3 +#define QPU_MUL_A_MASK QPU_MASK(5, 3) +#define QPU_MUL_B_SHIFT 0 +#define QPU_MUL_B_MASK QPU_MASK(2, 0) + +#define QPU_WS ((uint64_t)1 << 44) + +#define QPU_OP_ADD_SHIFT 24 +#define QPU_OP_ADD_MASK QPU_MASK(28, 24) + +#endif /* VC4_QPU_DEFINES_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c new file mode 100644 index 00000000000..cf90cb2e768 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -0,0 +1,363 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdbool.h> +#include <stdio.h> + +#include "vc4_qpu.h" +#include "vc4_qpu_defines.h" + +static const char *qpu_add_opcodes[] = { + [QPU_A_NOP] = "nop", + [QPU_A_FADD] = "fadd", + [QPU_A_FSUB] = "fsub", + [QPU_A_FMIN] = "fmin", + [QPU_A_FMAX] = "fmax", + [QPU_A_MINABS] = "minabs", + [QPU_A_MAXABS] = "maxabs", + [QPU_A_FTOI] = "ftoi", + [QPU_A_ITOF] = "itof", + [QPU_A_ADD] = "add", + [QPU_A_SUB] = "sub", + [QPU_A_SHR] = "shr", + [QPU_A_ASR] = "asr", + [QPU_A_ROR] = "ror", + [QPU_A_SHL] = "shl", + [QPU_A_MIN] = "min", + [QPU_A_MAX] = "max", + [QPU_A_AND] = "and", + [QPU_A_OR] = "or", + [QPU_A_XOR] = "xor", + [QPU_A_NOT] = "not", + [QPU_A_CLZ] = "clz", + [QPU_A_V8ADDS] = "v8adds", + [QPU_A_V8SUBS] = "v8subs", +}; + +static const char *qpu_mul_opcodes[] = { + [QPU_M_NOP] = "nop", + [QPU_M_FMUL] = "fmul", + [QPU_M_MUL24] = "mul24", + [QPU_M_V8MULD] = "v8muld", + [QPU_M_V8MIN] = "v8min", + [QPU_M_V8MAX] = "v8max", + [QPU_M_V8ADDS] = "v8adds", + [QPU_M_V8SUBS] = "v8subs", +}; + +static const char *qpu_sig[] = { + [QPU_SIG_SW_BREAKPOINT] = "sig_brk", + [QPU_SIG_NONE] = "", + [QPU_SIG_THREAD_SWITCH] = "sig_switch", + [QPU_SIG_PROG_END] = "sig_end", + [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score", + [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score", + [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch", + [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load", + [QPU_SIG_COLOR_LOAD] = "sig_color_load", + [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end", + [QPU_SIG_LOAD_TMU0] = "load_tmu0", + [QPU_SIG_LOAD_TMU1] = "load_tmu1", + [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load", + [QPU_SIG_SMALL_IMM] = "sig_small_imm", + [QPU_SIG_LOAD_IMM] = "sig_load_imm", + [QPU_SIG_BRANCH] = "sig_branch", +}; + +static const char *qpu_pack_mul[] = { + [QPU_PACK_MUL_NOP] = "", + [QPU_PACK_MUL_8888] = "8888", + [QPU_PACK_MUL_8A] = "8a", + [QPU_PACK_MUL_8B] = "8b", + [QPU_PACK_MUL_8C] = "8c", + [QPU_PACK_MUL_8D] = "8d", +}; + +static const char *special_read_a[] = { + "uni", + NULL, + NULL, + "vary", + NULL, + NULL, + "elem", + "nop", + NULL, + "x_pix", + "ms_flags", + NULL, + NULL, + NULL, + NULL, + NULL, + "vpm_read", + "vpm_ld_busy", + "vpm_ld_wait", + "mutex_acq" +}; + +static const char *special_read_b[] = { + "uni", + NULL, + NULL, + "vary", + NULL, + NULL, + "qpu", + "nop", + NULL, + "y_pix", + "rev_flag", + NULL, + NULL, + NULL, + NULL, + NULL, + "vpm_read", + "vpm_st_busy", + "vpm_st_wait", + "mutex_acq" +}; + +/** + * This has the B-file descriptions for register writes. + * + * Since only a couple of regs are different between A and B, the A overrides + * are in get_special_write_desc(). + */ +static const char *special_write[] = { + [QPU_W_ACC0] = "r0", + [QPU_W_ACC1] = "r1", + [QPU_W_ACC2] = "r2", + [QPU_W_ACC3] = "r3", + [QPU_W_TMU_NOSWAP] = "tmu_noswap", + [QPU_W_ACC5] = "r5", + [QPU_W_HOST_INT] = "host_int", + [QPU_W_NOP] = "nop", + [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr", + [QPU_W_QUAD_XY] = "quad_y", + [QPU_W_MS_FLAGS] = "ms_flags", + [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup", + [QPU_W_TLB_Z] = "tlb_z", + [QPU_W_TLB_COLOR_MS] = "tlb_color_ms", + [QPU_W_TLB_COLOR_ALL] = "tlb_color_all", + [QPU_W_VPM] = "vpm", + [QPU_W_VPMVCD_SETUP] = "vw_setup", + [QPU_W_VPM_ADDR] = "vw_addr", + [QPU_W_MUTEX_RELEASE] = "mutex_release", + [QPU_W_SFU_RECIP] = "sfu_recip", + [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt", + [QPU_W_SFU_EXP] = "sfu_exp", + [QPU_W_SFU_LOG] = "sfu_log", + [QPU_W_TMU0_S] = "tmu0_s", + [QPU_W_TMU0_T] = "tmu0_t", + [QPU_W_TMU0_R] = "tmu0_r", + [QPU_W_TMU0_B] = "tmu0_b", + [QPU_W_TMU1_S] = "tmu1_s", + [QPU_W_TMU1_T] = "tmu1_t", + [QPU_W_TMU1_R] = "tmu1_r", + [QPU_W_TMU1_B] = "tmu1_b", +}; + +static const char *qpu_pack_a[] = { + [QPU_PACK_A_NOP] = "", + [QPU_PACK_A_16A] = ".16a", + [QPU_PACK_A_16B] = ".16b", + [QPU_PACK_A_8888] = ".8888", + [QPU_PACK_A_8A] = ".8a", + [QPU_PACK_A_8B] = ".8b", + [QPU_PACK_A_8C] = ".8c", + [QPU_PACK_A_8D] = ".8d", + + [QPU_PACK_A_32_SAT] = ".sat", + [QPU_PACK_A_16A_SAT] = ".16a.sat", + [QPU_PACK_A_16B_SAT] = ".16b.sat", + [QPU_PACK_A_8888_SAT] = ".8888.sat", + [QPU_PACK_A_8A_SAT] = ".8a.sat", + [QPU_PACK_A_8B_SAT] = ".8b.sat", + [QPU_PACK_A_8C_SAT] = ".8c.sat", + [QPU_PACK_A_8D_SAT] = ".8d.sat", +}; + +#define DESC(array, index) \ + ((index > ARRAY_SIZE(array) || !(array)[index]) ? \ + "???" : (array)[index]) + +static const char * +get_special_write_desc(int reg, bool is_a) +{ + if (is_a) { + switch (reg) { + case QPU_W_QUAD_XY: + return "quad_x"; + case QPU_W_VPMVCD_SETUP: + return "vr_setup"; + case QPU_W_VPM_ADDR: + return "vr_addr"; + } + } + + return special_write[reg]; +} + +static void +print_alu_dst(uint64_t inst, bool is_mul) +{ + bool is_a = is_mul == ((inst & QPU_WS) != 0); + uint32_t waddr = (is_mul ? + QPU_GET_FIELD(inst, QPU_WADDR_MUL) : + QPU_GET_FIELD(inst, QPU_WADDR_ADD)); + const char *file = is_a ? "a" : "b"; + uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK); + + if (waddr <= 31) + fprintf(stderr, "r%s%d", file, waddr); + else if (get_special_write_desc(waddr, is_a)) + fprintf(stderr, "%s", get_special_write_desc(waddr, is_a)); + else + fprintf(stderr, "%s%d?", file, waddr); + + if (is_mul && (inst & QPU_PM)) { + fprintf(stderr, ".%s", DESC(qpu_pack_mul, pack)); + } else if (is_a && !(inst & QPU_PM)) { + fprintf(stderr, "%s", DESC(qpu_pack_a, pack)); + } +} + +static void +print_alu_src(uint64_t inst, uint32_t mux) +{ + bool is_a = mux != QPU_MUX_B; + const char *file = is_a ? "a" : "b"; + uint32_t raddr = (is_a ? + QPU_GET_FIELD(inst, QPU_RADDR_A) : + QPU_GET_FIELD(inst, QPU_RADDR_B)); + + if (mux <= QPU_MUX_R5) + fprintf(stderr, "r%d", mux); + else if (!is_a && + QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { + uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM); + if (si <= 15) + fprintf(stderr, "%d", si); + else if (si <= 31) + fprintf(stderr, "%d", -16 + (si - 16)); + else if (si <= 39) + fprintf(stderr, "%.1f", (float)(1 << (si - 32))); + else if (si <= 47) + fprintf(stderr, "%f", 1.0f / (256 / (si - 39))); + else + fprintf(stderr, "???"); + } else if (raddr <= 31) + fprintf(stderr, "r%s%d", file, raddr); + else { + if (is_a) + fprintf(stderr, "%s", DESC(special_read_a, raddr - 32)); + else + fprintf(stderr, "%s", DESC(special_read_b, raddr - 32)); + } +} + +static void +print_add_op(uint64_t inst) +{ + uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); + bool is_mov = (op_add == QPU_A_OR && + QPU_GET_FIELD(inst, QPU_ADD_A) == + QPU_GET_FIELD(inst, QPU_ADD_B)); + + fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_add_opcodes, op_add)); + + print_alu_dst(inst, false); + fprintf(stderr, ", "); + + print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A)); + + if (!is_mov) { + fprintf(stderr, ", "); + + print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B)); + } +} + +static void +print_mul_op(uint64_t inst) +{ + uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL); + bool is_mov = (op_mul == QPU_M_V8MIN && + QPU_GET_FIELD(inst, QPU_MUL_A) == + QPU_GET_FIELD(inst, QPU_MUL_B)); + + fprintf(stderr, "%s ", is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul)); + + print_alu_dst(inst, true); + fprintf(stderr, ", "); + + print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A)); + + if (!is_mov) { + fprintf(stderr, ", "); + print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B)); + } +} + +static void +print_load_imm(uint64_t inst) +{ + uint32_t imm = inst; + + fprintf(stderr, "load_imm "); + print_alu_dst(inst, false); + fprintf(stderr, ", "); + print_alu_dst(inst, true); + fprintf(stderr, ", "); + fprintf(stderr, "0x%08x (%f)", imm, uif(imm)); +} + +void +vc4_qpu_disasm(const uint64_t *instructions, int num_instructions) +{ + for (int i = 0; i < num_instructions; i++) { + uint64_t inst = instructions[i]; + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + + switch (sig) { + case QPU_SIG_BRANCH: + fprintf(stderr, "branch\n"); + break; + case QPU_SIG_LOAD_IMM: + print_load_imm(inst); + break; + default: + if (sig != QPU_SIG_NONE) + fprintf(stderr, "%s ", DESC(qpu_sig, sig)); + print_add_op(inst); + fprintf(stderr, " ; "); + print_mul_op(inst); + + if (num_instructions != 1) + fprintf(stderr, "\n"); + break; + } + } +} diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c new file mode 100644 index 00000000000..b9f28528c39 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -0,0 +1,275 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vc4_qpu.h" + +static bool +writes_reg(uint64_t inst, uint32_t w) +{ + return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w || + QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w); +} + +static bool +_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b) +{ + struct { + uint32_t mux, addr; + } src_regs[] = { + { QPU_GET_FIELD(inst, QPU_ADD_A) }, + { QPU_GET_FIELD(inst, QPU_ADD_B) }, + { QPU_GET_FIELD(inst, QPU_MUL_A) }, + { QPU_GET_FIELD(inst, QPU_MUL_B) }, + }; + + for (int i = 0; i < ARRAY_SIZE(src_regs); i++) { + if (!ignore_a && + src_regs[i].mux == QPU_MUX_A && + (QPU_GET_FIELD(inst, QPU_RADDR_A) == r)) + return true; + + if (!ignore_b && + src_regs[i].mux == QPU_MUX_B && + (QPU_GET_FIELD(inst, QPU_RADDR_B) == r)) + return true; + } + + return false; +} + +static bool +reads_reg(uint64_t inst, uint32_t r) +{ + return _reads_reg(inst, r, false, false); +} + +static bool +reads_a_reg(uint64_t inst, uint32_t r) +{ + return _reads_reg(inst, r, false, true); +} + +static bool +writes_sfu(uint64_t inst) +{ + return (writes_reg(inst, QPU_W_SFU_RECIP) || + writes_reg(inst, QPU_W_SFU_RECIPSQRT) || + writes_reg(inst, QPU_W_SFU_EXP) || + writes_reg(inst, QPU_W_SFU_LOG)); +} + +/** + * Checks for the instruction restrictions from page 37 ("Summary of + * Instruction Restrictions"). + */ +void +vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) +{ + for (int i = 0; i < num_inst; i++) { + uint64_t inst = insts[i]; + + if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) + continue; + + /* "The Thread End instruction must not write to either physical + * regfile A or B." + */ + assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32); + assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32); + + /* Two delay slots will be executed. */ + assert(i + 2 <= num_inst); + + for (int j = i; j < i + 2; j++) { + /* "The last three instructions of any program + * (Thread End plus the following two delay-slot + * instructions) must not do varyings read, uniforms + * read or any kind of VPM, VDR, or VDW read or + * write." + */ + assert(!writes_reg(insts[j], QPU_W_VPM)); + assert(!reads_reg(insts[j], QPU_R_VARY)); + assert(!reads_reg(insts[j], QPU_R_UNIF)); + assert(!reads_reg(insts[j], QPU_R_VPM)); + + /* "The Thread End instruction and the following two + * delay slot instructions must not write or read + * address 14 in either regfile A or B." + */ + assert(!writes_reg(insts[j], 14)); + assert(!reads_reg(insts[j], 14)); + + } + + /* "The final program instruction (the second delay slot + * instruction) must not do a TLB Z write." + */ + assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z)); + } + + /* "A scoreboard wait must not occur in the first two instructions of + * a fragment shader. This is either the explicit Wait for Scoreboard + * signal or an implicit wait with the first tile-buffer read or + * write instruction." + */ + for (int i = 0; i < 2; i++) { + uint64_t inst = insts[i]; + + assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD); + assert(QPU_GET_FIELD(inst, QPU_SIG) != + QPU_SIG_WAIT_FOR_SCOREBOARD); + assert(!writes_reg(inst, QPU_W_TLB_COLOR_MS)); + assert(!writes_reg(inst, QPU_W_TLB_COLOR_ALL)); + assert(!writes_reg(inst, QPU_W_TLB_Z)); + + } + + /* "If TMU_NOSWAP is written, the write must be three instructions + * before the first TMU write instruction. For example, if + * TMU_NOSWAP is written in the first shader instruction, the first + * TMU write cannot occur before the 4th shader instruction." + */ + int last_tmu_noswap = -10; + for (int i = 0; i < num_inst; i++) { + uint64_t inst = insts[i]; + + assert((i - last_tmu_noswap) > 3 || + (!writes_reg(inst, QPU_W_TMU0_S) && + !writes_reg(inst, QPU_W_TMU1_S))); + + if (writes_reg(inst, QPU_W_TMU_NOSWAP)) + last_tmu_noswap = i; + } + + /* "An instruction must not read from a location in physical regfile A + * or B that was written to by the previous instruction." + */ + for (int i = 0; i < num_inst - 1; i++) { + uint64_t inst = insts[i]; + uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + + assert(add_waddr >= 32 || !reads_reg(insts[i + 1], add_waddr)); + assert(mul_waddr >= 32 || !reads_reg(insts[i + 1], mul_waddr)); + } + + /* "After an SFU lookup instruction, accumulator r4 must not be read + * in the following two instructions. Any other instruction that + * results in r4 being written (that is, TMU read, TLB read, SFU + * lookup) cannot occur in the two instructions following an SFU + * lookup." + */ + int last_sfu_inst = -10; + for (int i = 0; i < num_inst - 1; i++) { + uint64_t inst = insts[i]; + + assert(i - last_sfu_inst > 2 || + (!writes_sfu(inst) && + !writes_reg(inst, QPU_W_TMU0_S) && + !writes_reg(inst, QPU_W_TMU1_S) && + QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD)); + + if (writes_sfu(inst)) + last_sfu_inst = i; + } + + int last_r5_write = -10; + for (int i = 0; i < num_inst - 1; i++) { + uint64_t inst = insts[i]; + + /* "An instruction that does a vector rotate by r5 must not + * immediately follow an instruction that writes to r5." + */ + assert(last_r5_write != i - 1 || + QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM || + QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48); + } + + /* "An instruction that does a vector rotate must not immediately + * follow an instruction that writes to the accumulator that is being + * rotated. + * + * XXX: TODO. + */ + + /* "After an instruction that does a TLB Z write, the multisample mask + * must not be read as an instruction input argument in the following + * two instruction. The TLB Z write instruction can, however, be + * followed immediately by a TLB color write." + */ + for (int i = 0; i < num_inst - 1; i++) { + uint64_t inst = insts[i]; + if (writes_reg(inst, QPU_W_TLB_Z)) { + assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS)); + assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS)); + } + } + + /* + * "A single instruction can only perform a maximum of one of the + * following closely coupled peripheral accesses in a single + * instruction: TMU write, TMU read, TLB write, TLB read, TLB + * combined color read and write, SFU write, Mutex read or Semaphore + * access." + */ + for (int i = 0; i < num_inst - 1; i++) { + uint64_t inst = insts[i]; + int accesses = 0; + static const uint32_t specials[] = { + QPU_W_TLB_COLOR_MS, + QPU_W_TLB_COLOR_ALL, + QPU_W_TLB_Z, + QPU_W_TMU0_S, + QPU_W_TMU0_T, + QPU_W_TMU0_R, + QPU_W_TMU0_B, + QPU_W_TMU1_S, + QPU_W_TMU1_T, + QPU_W_TMU1_R, + QPU_W_TMU1_B, + QPU_W_SFU_RECIP, + QPU_W_SFU_RECIPSQRT, + QPU_W_SFU_EXP, + QPU_W_SFU_LOG, + }; + + for (int j = 0; j < ARRAY_SIZE(specials); j++) { + if (writes_reg(inst, specials[j])) + accesses++; + } + + if (reads_reg(inst, QPU_R_MUTEX_ACQUIRE)) + accesses++; + + /* XXX: semaphore, combined color read/write? */ + switch (QPU_GET_FIELD(inst, QPU_SIG)) { + case QPU_SIG_COLOR_LOAD: + case QPU_SIG_COLOR_LOAD_END: + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + accesses++; + } + + assert(accesses <= 1); + } +} diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c new file mode 100644 index 00000000000..67ddfebf513 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -0,0 +1,350 @@ +/* + * Copyright © 2014 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> + +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" + +#include "vc4_screen.h" +#include "vc4_context.h" +#include "vc4_resource.h" + +static void +vc4_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + pipe_resource_reference(&ptrans->resource, NULL); + util_slab_free(&vc4->transfer_pool, ptrans); +} + +static void * +vc4_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_resource *rsc = vc4_resource(prsc); + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + ptrans = util_slab_alloc(&vc4->transfer_pool); + if (!ptrans) + return NULL; + + /* util_slab_alloc() doesn't zero: */ + memset(ptrans, 0, sizeof(*ptrans)); + + pipe_resource_reference(&ptrans->resource, prsc); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + ptrans->stride = rsc->slices[level].stride; + ptrans->layer_stride = ptrans->stride; + + /* Note that the current kernel implementation is synchronous, so no + * need to do syncing stuff here yet. + */ + + buf = vc4_bo_map(rsc->bo); + if (!buf) { + fprintf(stderr, "Failed to map bo\n"); + goto fail; + } + + *pptrans = ptrans; + + return buf + rsc->slices[level].offset + + box->y / util_format_get_blockheight(format) * ptrans->stride + + box->x / util_format_get_blockwidth(format) * rsc->cpp + + box->z * rsc->slices[level].size0; + +fail: + vc4_resource_transfer_unmap(pctx, ptrans); + return NULL; +} + +static void +vc4_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct vc4_resource *rsc = vc4_resource(prsc); + vc4_bo_unreference(&rsc->bo); + free(rsc); +} + +static boolean +vc4_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *prsc, + struct winsys_handle *handle) +{ + struct vc4_resource *rsc = vc4_resource(prsc); + + return vc4_screen_bo_get_handle(pscreen, rsc->bo, rsc->slices[0].stride, + handle); +} + +static const struct u_resource_vtbl vc4_resource_vtbl = { + .resource_get_handle = vc4_resource_get_handle, + .resource_destroy = vc4_resource_destroy, + .transfer_map = vc4_resource_transfer_map, + .transfer_flush_region = u_default_transfer_flush_region, + .transfer_unmap = vc4_resource_transfer_unmap, + .transfer_inline_write = u_default_transfer_inline_write, +}; + +static void +vc4_setup_slices(struct vc4_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base.b; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + uint32_t offset = 0; + + for (int i = prsc->last_level; i >= 0; i--) { + struct vc4_resource_slice *slice = &rsc->slices[i]; + uint32_t level_width = u_minify(width, i); + uint32_t level_height = u_minify(height, i); + + slice->offset = offset; + slice->stride = align(level_width * rsc->cpp, 16); + slice->size0 = level_height * slice->stride; + + /* Note, since we have cubes but no 3D, depth is invariant + * with miplevel. + */ + offset += slice->size0 * depth; + } + /* XXX: align level 0 offset? */ +} + +static struct vc4_resource * +vc4_resource_setup(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc4_resource *rsc = CALLOC_STRUCT(vc4_resource); + if (!rsc) + return NULL; + struct pipe_resource *prsc = &rsc->base.b; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + rsc->base.vtbl = &vc4_resource_vtbl; + rsc->cpp = util_format_get_blocksize(tmpl->format); + + assert(rsc->cpp); + + return rsc; +} + +static struct pipe_resource * +vc4_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base.b; + + vc4_setup_slices(rsc); + + rsc->bo = vc4_bo_alloc(vc4_screen(pscreen), + rsc->slices[0].offset + + rsc->slices[0].size0 * prsc->depth0, + "resource"); + if (!rsc->bo) + goto fail; + + return prsc; +fail: + vc4_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_resource * +vc4_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *handle) +{ + struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base.b; + struct vc4_resource_slice *slice = &rsc->slices[0]; + + if (!rsc) + return NULL; + + rsc->bo = vc4_screen_bo_from_handle(pscreen, handle, &slice->stride); + if (!rsc->bo) + goto fail; + +#ifdef USE_VC4_SIMULATOR + slice->stride = align(prsc->width0 * rsc->cpp, 16); +#endif + + return prsc; + +fail: + vc4_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_surface * +vc4_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ + struct vc4_surface *surface = CALLOC_STRUCT(vc4_surface); + + if (!surface) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + return &surface->base; +} + +static void +vc4_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} + +static void +vc4_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + /* XXX: Skip this if we don't have any queued drawing to it. */ + vc4->base.flush(pctx, NULL, 0); +} +static bool +render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc4_context *vc4 = vc4_context(ctx); + + if (!util_blitter_is_blit_supported(vc4->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb); + util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx); + util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.vs); + util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer); + util_blitter_save_viewport(vc4->blitter, &vc4->viewport); + util_blitter_save_scissor(vc4->blitter, &vc4->scissor); + util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.fs); + util_blitter_save_blend(vc4->blitter, vc4->blend); + util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa); + util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref); + util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask); + util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer); + util_blitter_save_fragment_sampler_states(vc4->blitter, + vc4->fragtex.num_samplers, + (void **)vc4->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc4->blitter, + vc4->fragtex.num_textures, vc4->fragtex.textures); + + util_blitter_blit(vc4->blitter, info); + + return true; +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +static void +vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format) && + !util_format_is_pure_integer(info.src.resource->format)) { + fprintf(stderr, "color resolve unimplemented"); + return; + } + + if (util_try_blit_via_copy_region(pctx, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + fprintf(stderr, "cannot blit stencil, skipping"); + info.mask &= ~PIPE_MASK_S; + } + + render_blit(pctx, &info); +} + +void +vc4_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create = vc4_resource_create; + pscreen->resource_from_handle = vc4_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; +} + +void +vc4_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_map_vtbl; + pctx->transfer_flush_region = u_transfer_flush_region_vtbl; + pctx->transfer_unmap = u_transfer_unmap_vtbl; + pctx->transfer_inline_write = u_transfer_inline_write_vtbl; + pctx->create_surface = vc4_create_surface; + pctx->surface_destroy = vc4_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = vc4_blit; + pctx->flush_resource = vc4_flush_resource; +} diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h new file mode 100644 index 00000000000..664e8bf5148 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -0,0 +1,68 @@ +/* + * Copyright © 2014 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_RESOURCE_H +#define VC4_RESOURCE_H + +#include "vc4_screen.h" +#include "util/u_transfer.h" + +struct vc4_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t size0; +}; + +struct vc4_surface { + struct pipe_surface base; + uint32_t offset; + uint32_t stride; + uint32_t width; + uint16_t height; + uint16_t depth; +}; + +struct vc4_resource { + struct u_resource base; + struct vc4_bo *bo; + struct vc4_resource_slice slices[VC4_MAX_MIP_LEVELS]; + int cpp; +}; + +static INLINE struct vc4_resource * +vc4_resource(struct pipe_resource *prsc) +{ + return (struct vc4_resource *)prsc; +} + +static INLINE struct vc4_surface * +vc4_surface(struct pipe_surface *psurf) +{ + return (struct vc4_surface *)psurf; +} + +void vc4_resource_screen_init(struct pipe_screen *pscreen); +void vc4_resource_context_init(struct pipe_context *pctx); + +#endif /* VC4_RESOURCE_H */ diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c new file mode 100644 index 00000000000..7c7ed4ca6eb --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -0,0 +1,439 @@ +/* + * Copyright © 2014 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_format.h" + +#include "vc4_screen.h" +#include "vc4_context.h" +#include "vc4_resource.h" + +static const struct debug_named_value debug_options[] = { + {"cl", VC4_DBG_CL, "Dump command list during creation"}, +}; + +static const char * +vc4_screen_get_name(struct pipe_screen *pscreen) +{ + return "VC4"; +} + +static const char * +vc4_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "Broadcom"; +} + +static void +vc4_screen_destroy(struct pipe_screen *pscreen) +{ + free(pscreen); +} + +static int +vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + + /* lying for GL 2.0 */ + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + return 1; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 120; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + /* Unsupported features. */ + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_SM3: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_DRAW_INDIRECT: + return 0; + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 0; + + /* Geometry shader output, unsupported. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return VC4_MAX_MIP_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 1; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 0; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_MIN_TEXEL_OFFSET: + return 0; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + default: + fprintf(stderr, "unknown param %d\n", param); + return 0; + } +} + +static float +vc4_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 0.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + default: + fprintf(stderr, "unknown paramf %d\n", param); + return 0; + } +} + +static int +vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 0; + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 64; /* Max native temporaries. */ + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; /* Max native address registers */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 64 * sizeof(float[4]); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* nothing uses this */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INTEGERS: + case PIPE_SHADER_CAP_DOUBLES: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return 16; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + default: + fprintf(stderr, "unknown shader param %d\n", param); + return 0; + } + return 0; +} + +uint8_t +vc4_get_texture_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + return 0; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return 1; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return 0; + case PIPE_FORMAT_R8G8B8X8_UNORM: + return 1; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return 0; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return 1; + case PIPE_FORMAT_A8B8G8R8_UNORM: + return 0; + case PIPE_FORMAT_X8B8G8R8_UNORM: + return 1; +/* + case PIPE_FORMAT_R4G4B4A4_UNORM: + return 2; + case PIPE_FORMAT_R5G5B5A1_UNORM: + return 3; + case PIPE_FORMAT_R5G6B5_UNORM: + return 4; +*/ + case PIPE_FORMAT_L8_UNORM: + return 5; + case PIPE_FORMAT_A8_UNORM: + return 6; + case PIPE_FORMAT_L8A8_UNORM: + return 7; + /* XXX: ETC1 and more*/ + default: + return ~0; + } +} + +static boolean +vc4_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || + !util_format_is_supported(format, usage)) { + return FALSE; + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) + retval |= PIPE_BIND_VERTEX_BUFFER; /* XXX */ + + if ((usage & PIPE_BIND_RENDER_TARGET) && + (format == PIPE_FORMAT_B8G8R8A8_UNORM || + format == PIPE_FORMAT_B8G8R8X8_UNORM || /* XXX: really? */ + format == PIPE_FORMAT_R8G8B8A8_UNORM || + format == PIPE_FORMAT_R8G8B8X8_UNORM || /* XXX: really? */ + format == PIPE_FORMAT_A8B8G8R8_UNORM || + format == PIPE_FORMAT_X8B8G8R8_UNORM || /* XXX: really? */ + format == PIPE_FORMAT_A8R8G8B8_UNORM || + format == PIPE_FORMAT_X8R8G8B8_UNORM || /* XXX: really? */ + format == PIPE_FORMAT_R16G16B16A16_FLOAT)) { + retval |= PIPE_BIND_RENDER_TARGET; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (vc4_get_texture_format(format) != ~0)) { + retval |= usage & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_VERTEX_BUFFER); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + format == PIPE_FORMAT_Z24X8_UNORM)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + return retval == usage; +} + +struct pipe_screen * +vc4_screen_create(int fd) +{ + struct vc4_screen *screen = CALLOC_STRUCT(vc4_screen); + struct pipe_screen *pscreen; + + pscreen = &screen->base; + + pscreen->destroy = vc4_screen_destroy; + pscreen->get_param = vc4_screen_get_param; + pscreen->get_paramf = vc4_screen_get_paramf; + pscreen->get_shader_param = vc4_screen_get_shader_param; + pscreen->context_create = vc4_context_create; + pscreen->is_format_supported = vc4_screen_is_format_supported; + + screen->fd = fd; + +#if USE_VC4_SIMULATOR + vc4_simulator_init(screen); +#endif + + vc4_resource_screen_init(pscreen); + + pscreen->get_name = vc4_screen_get_name; + pscreen->get_vendor = vc4_screen_get_vendor; + + return pscreen; +} + +boolean +vc4_screen_bo_get_handle(struct pipe_screen *pscreen, + struct vc4_bo *bo, + unsigned stride, + struct winsys_handle *whandle) +{ + whandle->stride = stride; + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + return vc4_bo_flink(bo, &whandle->handle); + case DRM_API_HANDLE_TYPE_KMS: + whandle->handle = bo->handle; + return TRUE; + } + + return FALSE; +} + +struct vc4_bo * +vc4_screen_bo_from_handle(struct pipe_screen *pscreen, + struct winsys_handle *whandle, + unsigned *out_stride) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + struct vc4_bo *bo; + + if (whandle->type != DRM_API_HANDLE_TYPE_SHARED) { + fprintf(stderr, + "Attempt to import unsupported handle type %d\n", + whandle->type); + return NULL; + } + + bo = vc4_bo_open_name(screen, whandle->handle, whandle->stride); + if (!bo) { + fprintf(stderr, "Open name %d failed\n", whandle->handle); + return NULL; + } + + *out_stride = whandle->stride; + + return bo; +} diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h new file mode 100644 index 00000000000..64bd2cf6510 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -0,0 +1,63 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_SCREEN_H +#define VC4_SCREEN_H + +#include "pipe/p_screen.h" +#include "state_tracker/drm_driver.h" + +struct vc4_bo; + +#define VC4_DBG_CL 0x0001 + +#define VC4_MAX_MIP_LEVELS 11 + +struct vc4_screen { + struct pipe_screen base; + int fd; + + void *simulator_mem_base; + uint32_t simulator_mem_next; + uint32_t simulator_mem_size; +}; + +static inline struct vc4_screen * +vc4_screen(struct pipe_screen *screen) +{ + return (struct vc4_screen *)screen; +} + +struct pipe_screen *vc4_screen_create(int fd); +boolean vc4_screen_bo_get_handle(struct pipe_screen *pscreen, + struct vc4_bo *bo, + unsigned stride, + struct winsys_handle *whandle); +struct vc4_bo * +vc4_screen_bo_from_handle(struct pipe_screen *pscreen, + struct winsys_handle *whandle, + unsigned *out_stride); + +uint8_t vc4_get_texture_format(enum pipe_format format); + +#endif /* VC4_SCREEN_H */ diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c new file mode 100644 index 00000000000..bd938b2644c --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifdef USE_VC4_SIMULATOR + +#include <stdio.h> + +#include "vc4_screen.h" +#include "vc4_context.h" +#include "simpenrose/simpenrose.h" + +void +vc4_simulator_flush(struct vc4_context *vc4, struct vc4_surface *csurf) +{ + struct vc4_resource *ctex = vc4_resource(csurf->base.texture); + uint32_t winsys_stride = ctex->bo->simulator_winsys_stride; + uint32_t sim_stride = ctex->slices[0].stride; + uint32_t row_len = MIN2(sim_stride, winsys_stride); + + if (ctex->bo->simulator_winsys_map) { +#if 0 + fprintf(stderr, "%dx%d %d %d %d\n", + ctex->base.b.width0, ctex->base.b.height0, + winsys_stride, + sim_stride, + ctex->bo->size); +#endif + + for (int y = 0; y < ctex->base.b.height0; y++) { + memcpy(ctex->bo->map + y * sim_stride, + ctex->bo->simulator_winsys_map + y * winsys_stride, + row_len); + } + } + + simpenrose_do_binning(simpenrose_hw_addr(vc4->bcl.base), + simpenrose_hw_addr(vc4->bcl.next)); + simpenrose_do_rendering(simpenrose_hw_addr(vc4->rcl.base), + simpenrose_hw_addr(vc4->rcl.next)); + + if (ctex->bo->simulator_winsys_map) { + for (int y = 0; y < ctex->base.b.height0; y++) { + memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride, + ctex->bo->map + y * sim_stride, + row_len); + } + } +} + +void +vc4_simulator_init(struct vc4_screen *screen) +{ + simpenrose_init_hardware(); + screen->simulator_mem_base = simpenrose_get_mem_start(); + screen->simulator_mem_size = simpenrose_get_mem_size(); +} + +/** + * Allocates GPU memory in the simulator's address space. + * + * We just allocate for the lifetime of the context now, but some day we'll + * want an actual memory allocator at runtime. + */ +void * +vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size) +{ + void *alloc = screen->simulator_mem_base + screen->simulator_mem_next; + + screen->simulator_mem_next += size; + assert(screen->simulator_mem_next < screen->simulator_mem_size); + screen->simulator_mem_next = align(screen->simulator_mem_next, 4096); + + return alloc; +} + +#endif /* USE_VC4_SIMULATOR */ diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c new file mode 100644 index 00000000000..6ad7eea8f24 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -0,0 +1,444 @@ +/* + * Copyright © 2014 Broadcom + * Copyright (C) 2012 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> + +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" + +#include "vc4_context.h" + +static void * +vc4_generic_cso_state_create(const void *src, uint32_t size) +{ + void *dst = calloc(1, size); + if (!dst) + return NULL; + memcpy(dst, src, size); + return dst; +} + +static void +vc4_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +vc4_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->blend_color = *blend_color; + vc4->dirty |= VC4_DIRTY_BLEND_COLOR; +} + +static void +vc4_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->stencil_ref =* stencil_ref; + vc4->dirty |= VC4_DIRTY_STENCIL_REF; +} + +static void +vc4_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + fprintf(stderr, "clip todo\n"); +} + +static void +vc4_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->sample_mask = (uint16_t)sample_mask; + vc4->dirty |= VC4_DIRTY_SAMPLE_MASK; +} + +static void * +vc4_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct vc4_rasterizer_state *so; + + so = CALLOC_STRUCT(vc4_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + if (!(cso->cull_face & PIPE_FACE_FRONT)) + so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_PRIM_FRONT; + if (!(cso->cull_face & PIPE_FACE_BACK)) + so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_PRIM_BACK; + + /* XXX: per_vertex */ + so->point_size = cso->point_size; + + if (!cso->front_ccw) + so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES; + + if (cso->offset_tri) + so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET; + + so->config_bits[2] |= VC4_CONFIG_BITS_EARLY_Z_UPDATE; + + return so; +} + +/* Blend state is baked into shaders. */ +static void * +vc4_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + return vc4_generic_cso_state_create(cso, sizeof(*cso)); +} + +static void * +vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + return vc4_generic_cso_state_create(cso, sizeof(*cso)); +} + +static void +vc4_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->stipple = *stipple; + vc4->dirty |= VC4_DIRTY_STIPPLE; +} + +static void +vc4_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + vc4->scissor = *scissor; + vc4->dirty |= VC4_DIRTY_SCISSOR; +} + +static void +vc4_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->viewport = *viewport; + vc4->dirty |= VC4_DIRTY_VIEWPORT; +} + +static void +vc4_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_vertexbuf_stateobj *so = &vc4->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, + start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + vc4->dirty |= VC4_DIRTY_VTXBUF; +} + +static void +vc4_set_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (ib) { + pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); + vc4->indexbuf.index_size = ib->index_size; + vc4->indexbuf.offset = ib->offset; + vc4->indexbuf.user_buffer = ib->user_buffer; + } else { + pipe_resource_reference(&vc4->indexbuf.buffer, NULL); + } + + vc4->dirty |= VC4_DIRTY_INDEXBUF; +} + +static void +vc4_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->blend = hwcso; + vc4->dirty |= VC4_DIRTY_BLEND; +} + +static void +vc4_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->rasterizer = hwcso; + vc4->dirty |= VC4_DIRTY_RASTERIZER; +} + +static void +vc4_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->zsa = hwcso; + vc4->dirty |= VC4_DIRTY_ZSA; +} + +static void * +vc4_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct vc4_vertex_stateobj *so = CALLOC_STRUCT(vc4_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + return so; +} + +static void +vc4_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + vc4->vtx = hwcso; + vc4->dirty |= VC4_DIRTY_VTXSTATE; +} + +static void +vc4_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_constbuf_stateobj *so = &vc4->constbuf[shader]; + + assert(index == 0); + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + pipe_resource_reference(&so->cb[index].buffer, NULL); + return; + } + + pipe_resource_reference(&so->cb[index].buffer, cb->buffer); + so->cb[index].buffer_offset = cb->buffer_offset; + so->cb[index].buffer_size = cb->buffer_size; + so->cb[index].user_buffer = cb->user_buffer; + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + vc4->dirty |= VC4_DIRTY_CONSTBUF; +} + +static void +vc4_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_framebuffer_state *cso = &vc4->framebuffer; + unsigned i; + + vc4_flush(pctx); + + for (i = 0; i < framebuffer->nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); + for (; i < vc4->framebuffer.nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], NULL); + + cso->nr_cbufs = framebuffer->nr_cbufs; + + cso->width = framebuffer->width; + cso->height = framebuffer->height; + + pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); + + vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; +} + +static struct vc4_texture_stateobj * +vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader) +{ + vc4->dirty |= VC4_DIRTY_TEXSTATE; + + switch (shader) { + case PIPE_SHADER_FRAGMENT: + vc4->dirty |= VC4_DIRTY_FRAGTEX; + return &vc4->fragtex; + break; + case PIPE_SHADER_VERTEX: + vc4->dirty |= VC4_DIRTY_VERTTEX; + return &vc4->verttex; + break; + default: + fprintf(stderr, "Unknown shader target %d\n", shader); + abort(); + } +} + +static void * +vc4_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + return vc4_generic_cso_state_create(cso, sizeof(*cso)); +} + +static void +vc4_sampler_states_bind(struct pipe_context *pctx, + unsigned shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_texture_stateobj *stage_tex = vc4_get_stage_tex(vc4, shader); + + assert(start == 0); + unsigned i; + unsigned new_nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + stage_tex->samplers[i] = hwcso[i]; + stage_tex->dirty_samplers |= (1 << i); + } + + for (; i < stage_tex->num_samplers; i++) { + stage_tex->samplers[i] = NULL; + stage_tex->dirty_samplers |= (1 << i); + } + + stage_tex->num_samplers = new_nr; +} + +static struct pipe_sampler_view * +vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct pipe_sampler_view *so = malloc(sizeof(*so)); + + if (!so) + return NULL; + + *so = *cso; + pipe_reference(NULL, &prsc->reference); + so->texture = prsc; + so->reference.count = 1; + so->context = pctx; + + return so; +} + +static void +vc4_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + free(view); +} + +static void +vc4_set_sampler_views(struct pipe_context *pctx, unsigned shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_texture_stateobj *stage_tex = vc4_get_stage_tex(vc4, shader); + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + vc4->dirty |= VC4_DIRTY_TEXSTATE; + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); + stage_tex->dirty_samplers |= (1 << i); + } + + for (; i < stage_tex->num_textures; i++) { + pipe_sampler_view_reference(&stage_tex->textures[i], NULL); + stage_tex->dirty_samplers |= (1 << i); + } + + stage_tex->num_textures = new_nr; +} + +void +vc4_state_init(struct pipe_context *pctx) +{ + pctx->set_blend_color = vc4_set_blend_color; + pctx->set_stencil_ref = vc4_set_stencil_ref; + pctx->set_clip_state = vc4_set_clip_state; + pctx->set_sample_mask = vc4_set_sample_mask; + pctx->set_constant_buffer = vc4_set_constant_buffer; + pctx->set_framebuffer_state = vc4_set_framebuffer_state; + pctx->set_polygon_stipple = vc4_set_polygon_stipple; + pctx->set_scissor_states = vc4_set_scissor_states; + pctx->set_viewport_states = vc4_set_viewport_states; + + pctx->set_vertex_buffers = vc4_set_vertex_buffers; + pctx->set_index_buffer = vc4_set_index_buffer; + + pctx->create_blend_state = vc4_create_blend_state; + pctx->bind_blend_state = vc4_blend_state_bind; + pctx->delete_blend_state = vc4_generic_cso_state_delete; + + pctx->create_rasterizer_state = vc4_create_rasterizer_state; + pctx->bind_rasterizer_state = vc4_rasterizer_state_bind; + pctx->delete_rasterizer_state = vc4_generic_cso_state_delete; + + pctx->create_depth_stencil_alpha_state = vc4_create_depth_stencil_alpha_state; + pctx->bind_depth_stencil_alpha_state = vc4_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = vc4_generic_cso_state_delete; + + pctx->create_vertex_elements_state = vc4_vertex_state_create; + pctx->delete_vertex_elements_state = vc4_generic_cso_state_delete; + pctx->bind_vertex_elements_state = vc4_vertex_state_bind; + + pctx->create_sampler_state = vc4_create_sampler_state; + pctx->delete_sampler_state = vc4_generic_cso_state_delete; + pctx->bind_sampler_states = vc4_sampler_states_bind; + + pctx->create_sampler_view = vc4_create_sampler_view; + pctx->sampler_view_destroy = vc4_sampler_view_destroy; + pctx->set_sampler_views = vc4_set_sampler_views; +} |