panfrost: Initial stub for Panfrost driver

This patch adds an initial stub for the Gallium driver, containing simple screen functions and the majority of the driver headers but no actual functionality. It further adds the winsys glue for linking in this stub driver via kmsro on Rockchip/Amlogic boards. Signed-off-by: Alyssa Rosenzweig <[email protected]> Acked-by: Jason Ekstrand <[email protected]> Acked-by: Rob Clark <[email protected]> Acked-by: Eric Anholt <[email protected]> Acked-by: Emil Velikov <[email protected]>
author: Alyssa Rosenzweig <[email protected]> 2019-01-29 05:46:07 +0000
committer: Alyssa Rosenzweig <[email protected]> 2019-02-05 01:19:30 +0000
commit: 61d3ae6e0bde93c5601278d1a60c44be655a7cb5 (patch)
tree: c0c23980f21110a1459db40e18b06edb92d87ded /src
parent: 742d6cdb42e5570a3a74005f18bb89208069d01f (diff)
25 files changed, 3239 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index fd901fe64ec..2ddbb2d125b 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -122,6 +122,11 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
         .configuration = pipe_default_configuration_query,
     },
     {
+        .driver_name = "panfrost",
+        .create_screen = pipe_panfrost_create_screen,
+        .configuration = pipe_default_configuration_query,
+    },
+    {
         .driver_name = "etnaviv",
         .create_screen = pipe_etna_create_screen,
         .configuration = pipe_default_configuration_query,
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 5e43011b266..1a87c4494d4 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -333,6 +333,29 @@ pipe_v3d_create_screen(int fd, const struct pipe_screen_config *config)
 
 #endif
 
+#ifdef GALLIUM_PANFROST
+#include "panfrost/drm/panfrost_drm_public.h"
+
+struct pipe_screen *
+pipe_panfrost_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   struct pipe_screen *screen;
+
+   screen = panfrost_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_panfrost_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   fprintf(stderr, "panfrost: driver missing\n");
+   return NULL;
+}
+
+#endif
+
 #ifdef GALLIUM_ETNAVIV
 #include "etnaviv/drm/etnaviv_drm_public.h"
 
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
index 750ed09335c..0108a7f0bbb 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@@ -43,6 +43,9 @@ struct pipe_screen *
 pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
+pipe_panfrost_create_screen(int fd, const struct pipe_screen_config *config);
+
+struct pipe_screen *
 pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
diff --git a/src/gallium/drivers/panfrost/.editorconfig b/src/gallium/drivers/panfrost/.editorconfig
new file mode 100644
index 00000000000..40cd1044ada
--- /dev/null
+++ b/src/gallium/drivers/panfrost/.editorconfig
@@ -0,0 +1,3 @@
+[*.{c,h,cpp,hpp,cc,hh}]
+indent_style = space
+indent_size = 8
diff --git a/src/gallium/drivers/panfrost/include/meson.build b/src/gallium/drivers/panfrost/include/meson.build
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/gallium/drivers/panfrost/include/meson.build
diff --git a/src/gallium/drivers/panfrost/include/panfrost-job.h b/src/gallium/drivers/panfrost/include/panfrost-job.h
new file mode 100644
index 00000000000..dbb5486bfa4
--- /dev/null
+++ b/src/gallium/drivers/panfrost/include/panfrost-job.h
@@ -0,0 +1,1481 @@
+/*
+ * © Copyright 2017-2018 Alyssa Rosenzweig
+ * © Copyright 2017-2018 Connor Abbott
+ * © Copyright 2017-2018 Lyude Paul
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __PANFROST_JOB_H__
+#define __PANFROST_JOB_H__
+
+#include <stdint.h>
+#include <panfrost-misc.h>
+
+#define T8XX
+
+#define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8)
+
+#define MALI_FBD_HIERARCHY_WEIGHTS 8
+
+#define MALI_PAYLOAD_SIZE 256
+
+typedef u32 mali_jd_core_req;
+
+enum mali_job_type {
+        JOB_NOT_STARTED	= 0,
+        JOB_TYPE_NULL = 1,
+        JOB_TYPE_SET_VALUE = 2,
+        JOB_TYPE_CACHE_FLUSH = 3,
+        JOB_TYPE_COMPUTE = 4,
+        JOB_TYPE_VERTEX = 5,
+        JOB_TYPE_GEOMETRY = 6,
+        JOB_TYPE_TILER = 7,
+        JOB_TYPE_FUSED = 8,
+        JOB_TYPE_FRAGMENT = 9,
+};
+
+enum mali_draw_mode {
+        MALI_DRAW_NONE      = 0x0,
+        MALI_POINTS         = 0x1,
+        MALI_LINES          = 0x2,
+        MALI_LINE_STRIP     = 0x4,
+        MALI_LINE_LOOP      = 0x6,
+        MALI_TRIANGLES      = 0x8,
+        MALI_TRIANGLE_STRIP = 0xA,
+        MALI_TRIANGLE_FAN   = 0xC,
+        MALI_POLYGON        = 0xD,
+        MALI_QUADS          = 0xE,
+        MALI_QUAD_STRIP     = 0xF,
+
+        /* All other modes invalid */
+};
+
+/* Applies to tiler_gl_enables */
+
+#define MALI_CULL_FACE_BACK  0x80
+#define MALI_CULL_FACE_FRONT 0x40
+
+#define MALI_FRONT_FACE(v) (v << 5)
+#define MALI_CCW (0)
+#define MALI_CW  (1)
+
+#define MALI_OCCLUSION_BOOLEAN 0x8
+
+/* TODO: Might this actually be a finer bitfield? */
+#define MALI_DEPTH_STENCIL_ENABLE 0x6400
+
+#define DS_ENABLE(field) \
+	(field == MALI_DEPTH_STENCIL_ENABLE) \
+	? "MALI_DEPTH_STENCIL_ENABLE" \
+	: (field == 0) ? "0" \
+	: "0 /* XXX: Unknown, check hexdump */"
+
+/* Used in stencil and depth tests */
+
+enum mali_func {
+        MALI_FUNC_NEVER    = 0,
+        MALI_FUNC_LESS     = 1,
+        MALI_FUNC_EQUAL    = 2,
+        MALI_FUNC_LEQUAL   = 3,
+        MALI_FUNC_GREATER  = 4,
+        MALI_FUNC_NOTEQUAL = 5,
+        MALI_FUNC_GEQUAL   = 6,
+        MALI_FUNC_ALWAYS   = 7
+};
+
+/* Same OpenGL, but mixed up. Why? Because forget me, that's why! */
+
+enum mali_alt_func {
+        MALI_ALT_FUNC_NEVER    = 0,
+        MALI_ALT_FUNC_GREATER  = 1,
+        MALI_ALT_FUNC_EQUAL    = 2,
+        MALI_ALT_FUNC_GEQUAL   = 3,
+        MALI_ALT_FUNC_LESS     = 4,
+        MALI_ALT_FUNC_NOTEQUAL = 5,
+        MALI_ALT_FUNC_LEQUAL   = 6,
+        MALI_ALT_FUNC_ALWAYS   = 7
+};
+
+/* Flags apply to unknown2_3? */
+
+#define MALI_HAS_MSAA		(1 << 0)
+#define MALI_CAN_DISCARD 	(1 << 5)
+
+/* Applies on T6XX, specifying that programmable blending is in use */
+#define MALI_HAS_BLEND_SHADER 	(1 << 6)
+
+/* func is mali_func */
+#define MALI_DEPTH_FUNC(func)	   (func << 8)
+#define MALI_GET_DEPTH_FUNC(flags) ((flags >> 8) & 0x7)
+#define MALI_DEPTH_FUNC_MASK	   MALI_DEPTH_FUNC(0x7)
+
+#define MALI_DEPTH_TEST		(1 << 11)
+
+/* Next flags to unknown2_4 */
+#define MALI_STENCIL_TEST      	(1 << 0)
+
+/* What?! */
+#define MALI_SAMPLE_ALPHA_TO_COVERAGE_NO_BLEND_SHADER (1 << 1)
+
+#define MALI_NO_DITHER		(1 << 9)
+#define MALI_DEPTH_RANGE_A	(1 << 12)
+#define MALI_DEPTH_RANGE_B	(1 << 13)
+#define MALI_NO_MSAA		(1 << 14)
+
+/* Stencil test state is all encoded in a single u32, just with a lot of
+ * enums... */
+
+enum mali_stencil_op {
+        MALI_STENCIL_KEEP 	= 0,
+        MALI_STENCIL_REPLACE 	= 1,
+        MALI_STENCIL_ZERO 	= 2,
+        MALI_STENCIL_INVERT 	= 3,
+        MALI_STENCIL_INCR_WRAP 	= 4,
+        MALI_STENCIL_DECR_WRAP 	= 5,
+        MALI_STENCIL_INCR 	= 6,
+        MALI_STENCIL_DECR 	= 7
+};
+
+struct mali_stencil_test {
+        unsigned ref  			: 8;
+        unsigned mask 			: 8;
+        enum mali_func func 		: 3;
+        enum mali_stencil_op sfail 	: 3;
+        enum mali_stencil_op dpfail 	: 3;
+        enum mali_stencil_op dppass 	: 3;
+        unsigned zero			: 4;
+} __attribute__((packed));
+
+/* Blending is a mess, since anything fancy triggers a blend shader, and
+ * -those- are not understood whatsover yet */
+
+#define MALI_MASK_R (1 << 0)
+#define MALI_MASK_G (1 << 1)
+#define MALI_MASK_B (1 << 2)
+#define MALI_MASK_A (1 << 3)
+
+enum mali_nondominant_mode {
+        MALI_BLEND_NON_MIRROR = 0,
+        MALI_BLEND_NON_ZERO = 1
+};
+
+enum mali_dominant_blend {
+        MALI_BLEND_DOM_SOURCE = 0,
+        MALI_BLEND_DOM_DESTINATION  = 1
+};
+
+enum mali_dominant_factor {
+        MALI_DOMINANT_UNK0 = 0,
+        MALI_DOMINANT_ZERO = 1,
+        MALI_DOMINANT_SRC_COLOR = 2,
+        MALI_DOMINANT_DST_COLOR = 3,
+        MALI_DOMINANT_UNK4 = 4,
+        MALI_DOMINANT_SRC_ALPHA = 5,
+        MALI_DOMINANT_DST_ALPHA = 6,
+        MALI_DOMINANT_CONSTANT = 7,
+};
+
+enum mali_blend_modifier {
+        MALI_BLEND_MOD_UNK0 = 0,
+        MALI_BLEND_MOD_NORMAL = 1,
+        MALI_BLEND_MOD_SOURCE_ONE = 2,
+        MALI_BLEND_MOD_DEST_ONE = 3,
+};
+
+struct mali_blend_mode {
+        enum mali_blend_modifier clip_modifier : 2;
+        unsigned unused_0 : 1;
+        unsigned negate_source : 1;
+
+        enum mali_dominant_blend dominant : 1;
+
+        enum mali_nondominant_mode nondominant_mode : 1;
+
+        unsigned unused_1 : 1;
+
+        unsigned negate_dest : 1;
+
+        enum mali_dominant_factor dominant_factor : 3;
+        unsigned complement_dominant : 1;
+} __attribute__((packed));
+
+struct mali_blend_equation {
+        /* Of type mali_blend_mode */
+        unsigned rgb_mode : 12;
+        unsigned alpha_mode : 12;
+
+        unsigned zero1 : 4;
+
+        /* Corresponds to MALI_MASK_* above and glColorMask arguments */
+
+        unsigned color_mask : 4;
+
+        /* Attached constant for CONSTANT_ALPHA, etc */
+
+#ifndef BIFROST
+        float constant;
+#endif
+} __attribute__((packed));
+
+/* Used with channel swizzling */
+enum mali_channel {
+	MALI_CHANNEL_RED = 0,
+	MALI_CHANNEL_GREEN = 1,
+	MALI_CHANNEL_BLUE = 2,
+	MALI_CHANNEL_ALPHA = 3,
+	MALI_CHANNEL_ZERO = 4,
+	MALI_CHANNEL_ONE = 5,
+	MALI_CHANNEL_RESERVED_0 = 6,
+	MALI_CHANNEL_RESERVED_1 = 7,
+};
+
+struct mali_channel_swizzle {
+	enum mali_channel r : 3;
+	enum mali_channel g : 3;
+	enum mali_channel b : 3;
+	enum mali_channel a : 3;
+} __attribute__((packed));
+
+/* Compressed per-pixel formats. Each of these formats expands to one to four
+ * floating-point or integer numbers, as defined by the OpenGL specification.
+ * There are various places in OpenGL where the user can specify a compressed
+ * format in memory, which all use the same 8-bit enum in the various
+ * descriptors, although different hardware units support different formats.
+ */
+
+/* The top 3 bits specify how the bits of each component are interpreted. */
+
+/* e.g. R11F_G11F_B10F */
+#define MALI_FORMAT_SPECIAL (2 << 5)
+
+/* signed normalized, e.g. RGBA8_SNORM */
+#define MALI_FORMAT_SNORM (3 << 5)
+
+/* e.g. RGBA8UI */
+#define MALI_FORMAT_UINT (4 << 5)
+
+/* e.g. RGBA8 and RGBA32F */
+#define MALI_FORMAT_UNORM (5 << 5)
+
+/* e.g. RGBA8I and RGBA16F */
+#define MALI_FORMAT_SINT (6 << 5)
+
+/* These formats seem to largely duplicate the others. They're used at least
+ * for Bifrost framebuffer output.
+ */
+#define MALI_FORMAT_SPECIAL2 (7 << 5)
+
+/* If the high 3 bits are 3 to 6 these two bits say how many components
+ * there are.
+ */
+#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
+
+/* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
+ * component is, except the special MALI_CHANNEL_FLOAT which overrides what the
+ * bits mean.
+ */
+
+#define MALI_CHANNEL_8 3
+
+#define MALI_CHANNEL_16 4
+
+#define MALI_CHANNEL_32 5
+
+/* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
+ * MALI_FORMAT_UNORM, it means a 32-bit float.
+ */
+#define MALI_CHANNEL_FLOAT 7
+
+enum mali_format {
+	MALI_RGB10_A2_UNORM = MALI_FORMAT_SPECIAL | 0x3,
+	MALI_RGB10_A2_SNORM = MALI_FORMAT_SPECIAL | 0x5,
+	MALI_RGB10_A2UI     = MALI_FORMAT_SPECIAL | 0x7,
+	MALI_RGB10_A2I      = MALI_FORMAT_SPECIAL | 0x9,
+
+	/* YUV formats */
+	MALI_NV12           = MALI_FORMAT_SPECIAL | 0xc,
+
+	MALI_Z32_UNORM      = MALI_FORMAT_SPECIAL | 0xD,
+	MALI_R32_FIXED      = MALI_FORMAT_SPECIAL | 0x11,
+	MALI_RG32_FIXED     = MALI_FORMAT_SPECIAL | 0x12,
+	MALI_RGB32_FIXED    = MALI_FORMAT_SPECIAL | 0x13,
+	MALI_RGBA32_FIXED   = MALI_FORMAT_SPECIAL | 0x14,
+	MALI_R11F_G11F_B10F = MALI_FORMAT_SPECIAL | 0x19,
+	/* Only used for varyings, to indicate the transformed gl_Position */
+	MALI_VARYING_POS    = MALI_FORMAT_SPECIAL | 0x1e,
+	/* Only used for varyings, to indicate that the write should be
+	 * discarded.
+	 */
+	MALI_VARYING_DISCARD = MALI_FORMAT_SPECIAL | 0x1f,
+
+	MALI_R8_SNORM     = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_8,
+	MALI_R16_SNORM    = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_16,
+	MALI_R32_SNORM    = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_32,
+	MALI_RG8_SNORM    = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_8,
+	MALI_RG16_SNORM   = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_16,
+	MALI_RG32_SNORM   = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_32,
+	MALI_RGB8_SNORM   = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_8,
+	MALI_RGB16_SNORM  = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_16,
+	MALI_RGB32_SNORM  = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_32,
+	MALI_RGBA8_SNORM  = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_8,
+	MALI_RGBA16_SNORM = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_16,
+	MALI_RGBA32_SNORM = MALI_FORMAT_SNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_32,
+
+	MALI_R8UI     = MALI_FORMAT_UINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_8,
+	MALI_R16UI    = MALI_FORMAT_UINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_16,
+	MALI_R32UI    = MALI_FORMAT_UINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_32,
+	MALI_RG8UI    = MALI_FORMAT_UINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_8,
+	MALI_RG16UI   = MALI_FORMAT_UINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_16,
+	MALI_RG32UI   = MALI_FORMAT_UINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_32,
+	MALI_RGB8UI   = MALI_FORMAT_UINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_8,
+	MALI_RGB16UI  = MALI_FORMAT_UINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_16,
+	MALI_RGB32UI  = MALI_FORMAT_UINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_32,
+	MALI_RGBA8UI  = MALI_FORMAT_UINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_8,
+	MALI_RGBA16UI = MALI_FORMAT_UINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_16,
+	MALI_RGBA32UI = MALI_FORMAT_UINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_32,
+
+	MALI_R8_UNORM = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_8,
+	MALI_R16_UNORM = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_16,
+	MALI_R32_UNORM = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_32,
+	MALI_R32F = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(1) | MALI_CHANNEL_FLOAT,
+	MALI_RG8_UNORM    = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_8,
+	MALI_RG16_UNORM   = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_16,
+	MALI_RG32_UNORM   = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_32,
+	MALI_RG32F = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(2) | MALI_CHANNEL_FLOAT,
+	MALI_RGB8_UNORM   = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_8,
+	MALI_RGB16_UNORM  = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_16,
+	MALI_RGB32_UNORM  = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_32,
+	MALI_RGB32F = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(3) | MALI_CHANNEL_FLOAT,
+	MALI_RGBA8_UNORM  = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_8,
+	MALI_RGBA16_UNORM = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_16,
+	MALI_RGBA32_UNORM = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_32,
+	MALI_RGBA32F = MALI_FORMAT_UNORM | MALI_NR_CHANNELS(4) | MALI_CHANNEL_FLOAT,
+
+	MALI_R8I     = MALI_FORMAT_SINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_8,
+	MALI_R16I    = MALI_FORMAT_SINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_16,
+	MALI_R32I    = MALI_FORMAT_SINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_32,
+	MALI_R16F    = MALI_FORMAT_SINT | MALI_NR_CHANNELS(1) | MALI_CHANNEL_FLOAT,
+	MALI_RG8I    = MALI_FORMAT_SINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_8,
+	MALI_RG16I   = MALI_FORMAT_SINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_16,
+	MALI_RG32I   = MALI_FORMAT_SINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_32,
+	MALI_RG16F   = MALI_FORMAT_SINT | MALI_NR_CHANNELS(2) | MALI_CHANNEL_FLOAT,
+	MALI_RGB8I   = MALI_FORMAT_SINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_8,
+	MALI_RGB16I  = MALI_FORMAT_SINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_16,
+	MALI_RGB32I  = MALI_FORMAT_SINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_32,
+	MALI_RGB16F  = MALI_FORMAT_SINT | MALI_NR_CHANNELS(3) | MALI_CHANNEL_FLOAT,
+	MALI_RGBA8I  = MALI_FORMAT_SINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_8,
+	MALI_RGBA16I = MALI_FORMAT_SINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_16,
+	MALI_RGBA32I = MALI_FORMAT_SINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_32,
+	MALI_RGBA16F = MALI_FORMAT_SINT | MALI_NR_CHANNELS(4) | MALI_CHANNEL_FLOAT,
+
+	MALI_RGBA4      = MALI_FORMAT_SPECIAL2 | 0x8,
+	MALI_RGBA8_2    = MALI_FORMAT_SPECIAL2 | 0xd,
+	MALI_RGB10_A2_2 = MALI_FORMAT_SPECIAL2 | 0xe,
+};
+
+
+/* Alpha coverage is encoded as 4-bits (from a clampf), with inversion
+ * literally performing a bitwise invert. This function produces slightly wrong
+ * results and I'm not sure why; some rounding issue I suppose... */
+
+#define MALI_ALPHA_COVERAGE(clampf) ((uint16_t) (int) (clampf * 15.0f))
+#define MALI_GET_ALPHA_COVERAGE(nibble) ((float) nibble / 15.0f)
+
+/* Applies to unknown1 */
+#define MALI_NO_ALPHA_TO_COVERAGE (1 << 10)
+
+struct mali_blend_meta {
+#ifdef T8XX
+        /* Base value of 0x200.
+         * OR with 0x1 for blending (anything other than REPLACE).
+         * OR with 0x2 for programmable blending
+         */
+
+        u64 unk1;
+
+        /* For programmable blending, these turn into the blend_shader address */
+        struct mali_blend_equation blend_equation_1;
+
+        u64 zero2;
+        struct mali_blend_equation blend_equation_2;
+#else
+        u32 unk1; // = 0x200
+        struct mali_blend_equation blend_equation;
+        /*
+         * - 0x19 normally
+         * - 0x3 when this slot is unused (everything else is 0 except the index)
+         * - 0x11 when this is the fourth slot (and it's used)
++	 * - 0 when there is a blend shader
+         */
+        u16 unk2;
+        /* increments from 0 to 3 */
+        u16 index;
+
+	union {
+		struct {
+			/* So far, I've only seen:
+			 * - R001 for 1-component formats
+			 * - RG01 for 2-component formats
+			 * - RGB1 for 3-component formats
+			 * - RGBA for 4-component formats
+			 */
+			u32 swizzle : 12;
+			enum mali_format format : 8;
+
+			/* Type of the shader output variable. Note, this can
+			 * be different from the format.
+			 *
+			 * 0: f16 (mediump float)
+			 * 1: f32 (highp float)
+			 * 2: i32 (highp int)
+			 * 3: u32 (highp uint)
+			 * 4: i16 (mediump int)
+			 * 5: u16 (mediump uint)
+			 */
+			u32 shader_type : 3;
+			u32 zero : 9;
+		};
+
+		/* Only the low 32 bits of the blend shader are stored, the
+		 * high 32 bits are implicitly the same as the original shader.
+		 * According to the kernel driver, the program counter for
+		 * shaders is actually only 24 bits, so shaders cannot cross
+		 * the 2^24-byte boundary, and neither can the blend shader.
+		 * The blob handles this by allocating a 2^24 byte pool for
+		 * shaders, and making sure that any blend shaders are stored
+		 * in the same pool as the original shader. The kernel will
+		 * make sure this allocation is aligned to 2^24 bytes.
+		 */
+		u32 blend_shader;
+	};
+#endif
+} __attribute__((packed));
+
+struct mali_shader_meta {
+        mali_ptr shader;
+        u16 texture_count;
+        u16 sampler_count;
+        u16 attribute_count;
+        u16 varying_count;
+
+        union {
+                struct {
+                        u32 uniform_buffer_count : 4;
+                        u32 unk1 : 28; // = 0x800000 for vertex, 0x958020 for tiler
+                } bifrost1;
+                struct {
+                        /* 0x200 except MALI_NO_ALPHA_TO_COVERAGE. Mysterious 1
+                         * other times. Who knows really? */
+                        u16 unknown1;
+
+                        /* Whole number of uniform registers used, times two;
+                         * whole number of work registers used (no scale).
+                         */
+                        unsigned work_count : 5;
+                        unsigned uniform_count : 5;
+                        unsigned unknown2 : 6;
+                } midgard1;
+        };
+
+        /* On bifrost: Exactly the same as glPolygonOffset() for both.
+         * On midgard: Depth factor is exactly as passed to glPolygonOffset.
+         * Depth units is equal to the value passed to glDeptOhffset + 1.0f
+         * (use MALI_NEGATIVE)
+         */
+        float depth_units;
+        float depth_factor;
+
+        u32 unknown2_2;
+
+        u16 alpha_coverage;
+        u16 unknown2_3;
+
+        u8 stencil_mask_front;
+        u8 stencil_mask_back;
+        u16 unknown2_4;
+
+        struct mali_stencil_test stencil_front;
+        struct mali_stencil_test stencil_back;
+
+        union {
+                struct {
+                        u32 unk3 : 7;
+                        /* On Bifrost, some system values are preloaded in
+                         * registers R55-R62 by the thread dispatcher prior to
+                         * the start of shader execution. This is a bitfield
+                         * with one entry for each register saying which
+                         * registers need to be preloaded. Right now, the known
+                         * values are:
+                         *
+                         * Vertex/compute:
+                         * - R55 : gl_LocalInvocationID.xy
+                         * - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
+                         * - R57 : gl_WorkGroupID.x
+                         * - R58 : gl_WorkGroupID.y
+                         * - R59 : gl_WorkGroupID.z
+                         * - R60 : gl_GlobalInvocationID.x
+                         * - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
+                         * - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
+                         *
+                         * Fragment:
+                         * - R55 : unknown, never seen (but the bit for this is
+                         *   always set?)
+                         * - R56 : unknown (bit always unset)
+                         * - R57 : gl_PrimitiveID
+                         * - R58 : gl_FrontFacing in low bit, potentially other stuff
+                         * - R59 : u16 fragment coordinates (used to compute
+                         *   gl_FragCoord.xy, together with sample positions)
+                         * - R60 : gl_SampleMask (used in epilog, so pretty
+                         *   much always used, but the bit is always 0 -- is
+                         *   this just always pushed?)
+                         * - R61 : gl_SampleMaskIn and gl_SampleID, used by
+                         *   varying interpolation.
+                         * - R62 : unknown (bit always unset).
+                         */
+                        u32 preload_regs : 8;
+                        /* In units of 8 bytes or 64 bits, since the
+                         * uniform/const port loads 64 bits at a time.
+                         */
+                        u32 uniform_count : 7;
+                        u32 unk4 : 10; // = 2
+                } bifrost2;
+                struct {
+                        u32 unknown2_7;
+                } midgard2;
+        };
+
+        /* zero on bifrost */
+        u32 unknown2_8;
+
+        /* Blending information for the older non-MRT Midgard HW. Check for
+         * MALI_HAS_BLEND_SHADER to decide how to interpret.
+         */
+
+        union {
+                mali_ptr blend_shader;
+                struct mali_blend_equation blend_equation;
+        };
+
+        /* There can be up to 4 blend_meta's. None of them are required for
+         * vertex shaders or the non-MRT case for Midgard (so the blob doesn't
+         * allocate any space).
+         */
+        struct mali_blend_meta blend_meta[];
+
+} __attribute__((packed));
+
+/* This only concerns hardware jobs */
+
+/* Possible values for job_descriptor_size */
+
+#define MALI_JOB_32 0
+#define MALI_JOB_64 1
+
+struct mali_job_descriptor_header {
+        u32 exception_status;
+        u32 first_incomplete_task;
+        u64 fault_pointer;
+        u8 job_descriptor_size : 1;
+        enum mali_job_type job_type : 7;
+        u8 job_barrier : 1;
+        u8 unknown_flags : 7;
+        u16 job_index;
+        u16 job_dependency_index_1;
+        u16 job_dependency_index_2;
+
+        union {
+                u64 next_job_64;
+                u32 next_job_32;
+        };
+} __attribute__((packed));
+
+struct mali_payload_set_value {
+        u64 out;
+        u64 unknown;
+} __attribute__((packed));
+
+/* Special attributes have a fixed index */
+#define MALI_SPECIAL_ATTRIBUTE_BASE 16
+#define MALI_VERTEX_ID   (MALI_SPECIAL_ATTRIBUTE_BASE + 0)
+#define MALI_INSTANCE_ID (MALI_SPECIAL_ATTRIBUTE_BASE + 1)
+
+/*
+ * Mali Attributes
+ *
+ * This structure lets the attribute unit compute the address of an attribute
+ * given the vertex and instance ID. Unfortunately, the way this works is
+ * rather complicated when instancing is enabled.
+ *
+ * To explain this, first we need to explain how compute and vertex threads are
+ * dispatched. This is a guess (although a pretty firm guess!) since the
+ * details are mostly hidden from the driver, except for attribute instancing.
+ * When a quad is dispatched, it receives a single, linear index. However, we
+ * need to translate that index into a (vertex id, instance id) pair, or a
+ * (local id x, local id y, local id z) triple for compute shaders (although
+ * vertex shaders and compute shaders are handled almost identically).
+ * Focusing on vertex shaders, one option would be to do:
+ *
+ * vertex_id = linear_id % num_vertices
+ * instance_id = linear_id / num_vertices
+ *
+ * but this involves a costly division and modulus by an arbitrary number.
+ * Instead, we could pad num_vertices. We dispatch padded_num_vertices *
+ * num_instances threads instead of num_vertices * num_instances, which results
+ * in some "extra" threads with vertex_id >= num_vertices, which we have to
+ * discard.  The more we pad num_vertices, the more "wasted" threads we
+ * dispatch, but the division is potentially easier.
+ *
+ * One straightforward choice is to pad num_vertices to the next power of two,
+ * which means that the division and modulus are just simple bit shifts and
+ * masking. But the actual algorithm is a bit more complicated. The thread
+ * dispatcher has special support for dividing by 3, 5, 7, and 9, in addition
+ * to dividing by a power of two. This is possibly using the technique
+ * described in patent US20170010862A1. As a result, padded_num_vertices can be
+ * 1, 3, 5, 7, or 9 times a power of two. This results in less wasted threads,
+ * since we need less padding.
+ *
+ * padded_num_vertices is picked by the hardware. The driver just specifies the
+ * actual number of vertices. At least for Mali G71, the first few cases are
+ * given by:
+ *
+ * num_vertices	| padded_num_vertices
+ * 3		| 4
+ * 4-7		| 8
+ * 8-11		| 12 (3 * 4)
+ * 12-15	| 16
+ * 16-19	| 20 (5 * 4)
+ *
+ * Note that padded_num_vertices is a multiple of four (presumably because
+ * threads are dispatched in groups of 4). Also, padded_num_vertices is always
+ * at least one more than num_vertices, which seems like a quirk of the
+ * hardware. For larger num_vertices, the hardware uses the following
+ * algorithm: using the binary representation of num_vertices, we look at the
+ * most significant set bit as well as the following 3 bits. Let n be the
+ * number of bits after those 4 bits. Then we set padded_num_vertices according
+ * to the following table:
+ *
+ * high bits	| padded_num_vertices
+ * 1000		| 9 * 2^n
+ * 1001		| 5 * 2^(n+1)
+ * 101x		| 3 * 2^(n+2)
+ * 110x		| 7 * 2^(n+1)
+ * 111x		| 2^(n+4)
+ *
+ * For example, if num_vertices = 70 is passed to glDraw(), its binary
+ * representation is 1000110, so n = 3 and the high bits are 1000, and
+ * therefore padded_num_vertices = 9 * 2^3 = 72.
+ *
+ * The attribute unit works in terms of the original linear_id. if
+ * num_instances = 1, then they are the same, and everything is simple.
+ * However, with instancing things get more complicated. There are four
+ * possible modes, two of them we can group together:
+ *
+ * 1. Use the linear_id directly. Only used when there is no instancing.
+ *
+ * 2. Use the linear_id modulo a constant. This is used for per-vertex
+ * attributes with instancing enabled by making the constant equal
+ * padded_num_vertices. Because the modulus is always padded_num_vertices, this
+ * mode only supports a modulus that is a power of 2 times 1, 3, 5, 7, or 9.
+ * The shift field specifies the power of two, while the extra_flags field
+ * specifies the odd number. If shift = n and extra_flags = m, then the modulus
+ * is (2m + 1) * 2^n. As an example, if num_vertices = 70, then as computed
+ * above, padded_num_vertices = 9 * 2^3, so we should set extra_flags = 4 and
+ * shift = 3. Note that we must exactly follow the hardware algorithm used to
+ * get padded_num_vertices in order to correctly implement per-vertex
+ * attributes.
+ *
+ * 3. Divide the linear_id by a constant. In order to correctly implement
+ * instance divisors, we have to divide linear_id by padded_num_vertices times
+ * to user-specified divisor. So first we compute padded_num_vertices, again
+ * following the exact same algorithm that the hardware uses, then multiply it
+ * by the GL-level divisor to get the hardware-level divisor. This case is
+ * further divided into two more cases. If the hardware-level divisor is a
+ * power of two, then we just need to shift. The shift amount is specified by
+ * the shift field, so that the hardware-level divisor is just 2^shift.
+ *
+ * If it isn't a power of two, then we have to divide by an arbitrary integer.
+ * For that, we use the well-known technique of multiplying by an approximation
+ * of the inverse. The driver must compute the magic multiplier and shift
+ * amount, and then the hardware does the multiplication and shift. The
+ * hardware and driver also use the "round-down" optimization as described in
+ * http://ridiculousfish.com/files/faster_unsigned_division_by_constants.pdf.
+ * The hardware further assumes the multiplier is between 2^31 and 2^32, so the
+ * high bit is implicitly set to 1 even though it is set to 0 by the driver --
+ * presumably this simplifies the hardware multiplier a little. The hardware
+ * first multiplies linear_id by the multiplier and takes the high 32 bits,
+ * then applies the round-down correction if extra_flags = 1, then finally
+ * shifts right by the shift field.
+ *
+ * There are some differences between ridiculousfish's algorithm and the Mali
+ * hardware algorithm, which means that the reference code from ridiculousfish
+ * doesn't always produce the right constants. Mali does not use the pre-shift
+ * optimization, since that would make a hardware implementation slower (it
+ * would have to always do the pre-shift, multiply, and post-shift operations).
+ * It also forces the multplier to be at least 2^31, which means that the
+ * exponent is entirely fixed, so there is no trial-and-error. Altogether,
+ * given the divisor d, the algorithm the driver must follow is:
+ *
+ * 1. Set shift = floor(log2(d)).
+ * 2. Compute m = ceil(2^(shift + 32) / d) and e = 2^(shift + 32) % d.
+ * 3. If e <= 2^shift, then we need to use the round-down algorithm. Set
+ * magic_divisor = m - 1 and extra_flags = 1.
+ * 4. Otherwise, set magic_divisor = m and extra_flags = 0.
+ */
+
+enum mali_attr_mode {
+	MALI_ATTR_UNUSED = 0,
+	MALI_ATTR_LINEAR = 1,
+	MALI_ATTR_POT_DIVIDE = 2,
+	MALI_ATTR_MODULO = 3,
+	MALI_ATTR_NPOT_DIVIDE = 4,
+};
+
+union mali_attr {
+	/* This is used for actual attributes. */
+	struct {
+		/* The bottom 3 bits are the mode */
+		mali_ptr elements : 64 - 8;
+		u32 shift : 5;
+		u32 extra_flags : 3;
+		u32 stride;
+		u32 size;
+	};
+	/* The entry after an NPOT_DIVIDE entry has this format. It stores
+	 * extra information that wouldn't fit in a normal entry.
+	 */
+	struct {
+		u32 unk; /* = 0x20 */
+		u32 magic_divisor;
+		u32 zero;
+		/* This is the original, GL-level divisor. */
+		u32 divisor;
+	};
+} __attribute__((packed));
+
+struct mali_attr_meta {
+        /* Vertex buffer index */
+        u8 index;
+
+        unsigned unknown1 : 2;
+        unsigned swizzle : 12;
+        enum mali_format format : 8;
+
+        /* Always observed to be zero at the moment */
+        unsigned unknown3 : 2;
+
+        /* When packing multiple attributes in a buffer, offset addresses by this value */
+        uint32_t src_offset;
+} __attribute__((packed));
+
+enum mali_fbd_type {
+        MALI_SFBD = 0,
+        MALI_MFBD = 1,
+};
+
+#define FBD_TYPE (1)
+#define FBD_MASK (~0x3f)
+
+struct mali_uniform_buffer_meta {
+        /* This is actually the size minus 1 (MALI_POSITIVE), in units of 16
+         * bytes. This gives a maximum of 2^14 bytes, which just so happens to
+         * be the GL minimum-maximum for GL_MAX_UNIFORM_BLOCK_SIZE.
+         */
+        u64 size : 10;
+
+        /* This is missing the bottom 2 bits and top 8 bits. The top 8 bits
+         * should be 0 for userspace pointers, according to
+         * https://lwn.net/Articles/718895/. By reusing these bits, we can make
+         * each entry in the table only 64 bits.
+         */
+        mali_ptr ptr : 64 - 10;
+};
+
+/* On Bifrost, these fields are the same between the vertex and tiler payloads.
+ * They also seem to be the same between Bifrost and Midgard. They're shared in
+ * fused payloads.
+ */
+
+/* Applies to unknown_draw */
+#define MALI_DRAW_INDEXED_UINT8  (0x10)
+#define MALI_DRAW_INDEXED_UINT16 (0x20)
+#define MALI_DRAW_INDEXED_UINT32 (0x30)
+#define MALI_DRAW_VARYING_SIZE   (0x100)
+
+struct mali_vertex_tiler_prefix {
+        /* This is a dynamic bitfield containing the following things in this order:
+         *
+         * - gl_WorkGroupSize.x
+         * - gl_WorkGroupSize.y
+         * - gl_WorkGroupSize.z
+         * - gl_NumWorkGroups.x
+         * - gl_NumWorkGroups.y
+         * - gl_NumWorkGroups.z
+         *
+         * The number of bits allocated for each number is based on the *_shift
+         * fields below. For example, workgroups_y_shift gives the bit that
+         * gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit
+         * that gl_NumWorkGroups.z starts at (and therefore one after the bit
+         * that gl_NumWorkGroups.y ends at). The actual value for each gl_*
+         * value is one more than the stored value, since if any of the values
+         * are zero, then there would be no invocations (and hence no job). If
+         * there were 0 bits allocated to a given field, then it must be zero,
+         * and hence the real value is one.
+         *
+         * Vertex jobs reuse the same job dispatch mechanism as compute jobs,
+         * effectively doing glDispatchCompute(1, vertex_count, instance_count)
+         * where vertex count is the number of vertices.
+         */
+        u32 invocation_count;
+
+        u32 size_y_shift : 5;
+        u32 size_z_shift : 5;
+        u32 workgroups_x_shift : 6;
+        u32 workgroups_y_shift : 6;
+        u32 workgroups_z_shift : 6;
+        /* This is max(workgroups_x_shift, 2) in all the cases I've seen. */
+        u32 workgroups_x_shift_2 : 4;
+
+        u32 draw_mode : 4;
+        u32 unknown_draw : 22;
+
+        /* This is the the same as workgroups_x_shift_2 in compute shaders, but
+         * always 5 for vertex jobs and 6 for tiler jobs. I suspect this has
+         * something to do with how many quads get put in the same execution
+         * engine, which is a balance (you don't want to starve the engine, but
+         * you also want to distribute work evenly).
+         */
+        u32 workgroups_x_shift_3 : 6;
+
+
+        /* Negative of draw_start for TILER jobs from what I've seen */
+        int32_t negative_start;
+        u32 zero1;
+
+        /* Like many other strictly nonzero quantities, index_count is
+         * subtracted by one. For an indexed cube, this is equal to 35 = 6
+         * faces * 2 triangles/per face * 3 vertices/per triangle - 1. That is,
+         * for an indexed draw, index_count is the number of actual vertices
+         * rendered whereas invocation_count is the number of unique vertices
+         * rendered (the number of times the vertex shader must be invoked).
+         * For non-indexed draws, this is just equal to invocation_count. */
+
+        u32 index_count;
+
+        /* No hidden structure; literally just a pointer to an array of uint
+         * indices (width depends on flags). Thanks, guys, for not making my
+         * life insane for once! NULL for non-indexed draws. */
+
+        uintptr_t indices;
+} __attribute__((packed));
+
+/* Point size / line width can either be specified as a 32-bit float (for
+ * constant size) or as a [machine word size]-bit GPU pointer (for varying size). If a pointer
+ * is selected, by setting the appropriate MALI_DRAW_VARYING_SIZE bit in the tiler
+ * payload, the contents of varying_pointer will be intepreted as an array of
+ * fp16 sizes, one for each vertex. gl_PointSize is therefore implemented by
+ * creating a special MALI_R16F varying writing to varying_pointer. */
+
+union midgard_primitive_size {
+        float constant;
+        uintptr_t pointer;
+};
+
+struct bifrost_vertex_only {
+        u32 unk2; /* =0x2 */
+
+        u32 zero0;
+
+        u64 zero1;
+} __attribute__((packed));
+
+struct bifrost_tiler_heap_meta {
+        u32 zero;
+        u32 heap_size;
+        /* note: these are just guesses! */
+        mali_ptr tiler_heap_start;
+        mali_ptr tiler_heap_free;
+        mali_ptr tiler_heap_end;
+
+        /* hierarchy weights? but they're still 0 after the job has run... */
+        u32 zeros[12];
+} __attribute__((packed));
+
+struct bifrost_tiler_meta {
+        u64 zero0;
+        u32 unk; // = 0xf0
+        u16 width;
+        u16 height;
+        u64 zero1;
+        mali_ptr tiler_heap_meta;
+        /* TODO what is this used for? */
+        u64 zeros[20];
+} __attribute__((packed));
+
+struct bifrost_tiler_only {
+        /* 0x20 */
+        union midgard_primitive_size primitive_size;
+
+        mali_ptr tiler_meta;
+
+        u64 zero1, zero2, zero3, zero4, zero5, zero6;
+
+        u32 gl_enables;
+        u32 zero7;
+        u64 zero8;
+} __attribute__((packed));
+
+struct bifrost_scratchpad {
+        u32 zero;
+        u32 flags; // = 0x1f
+        /* This is a pointer to a CPU-inaccessible buffer, 16 pages, allocated
+         * during startup. It seems to serve the same purpose as the
+         * gpu_scratchpad in the SFBD for Midgard, although it's slightly
+         * larger.
+         */
+        mali_ptr gpu_scratchpad;
+} __attribute__((packed));
+
+struct mali_vertex_tiler_postfix {
+        /* Zero for vertex jobs. Pointer to the position (gl_Position) varying
+         * output from the vertex shader for tiler jobs.
+         */
+
+        uintptr_t position_varying;
+
+        /* An array of mali_uniform_buffer_meta's. The size is given by the
+         * shader_meta.
+         */
+        uintptr_t uniform_buffers;
+
+        /* This is a pointer to an array of pointers to the texture
+         * descriptors, number of pointers bounded by number of textures. The
+         * indirection is needed to accomodate varying numbers and sizes of
+         * texture descriptors */
+        uintptr_t texture_trampoline;
+
+        /* For OpenGL, from what I've seen, this is intimately connected to
+         * texture_meta. cwabbott says this is not the case under Vulkan, hence
+         * why this field is seperate (Midgard is Vulkan capable). Pointer to
+         * array of sampler descriptors (which are uniform in size) */
+        uintptr_t sampler_descriptor;
+
+        uintptr_t uniforms;
+        u8 flags : 4;
+        uintptr_t _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */
+        uintptr_t attributes; /* struct attribute_buffer[] */
+        uintptr_t attribute_meta; /* attribute_meta[] */
+        uintptr_t varyings; /* struct attr */
+        uintptr_t varying_meta; /* pointer */
+        uintptr_t viewport;
+        uintptr_t occlusion_counter; /* A single bit as far as I can tell */
+
+        /* Note: on Bifrost, this isn't actually the FBD. It points to
+         * bifrost_scratchpad instead. However, it does point to the same thing
+         * in vertex and tiler jobs.
+         */
+        mali_ptr framebuffer;
+
+#ifdef __LP64__
+#ifndef T8XX
+        /* most likely padding to make this a multiple of 64 bytes */
+        u64 zero7;
+#endif
+#endif
+} __attribute__((packed));
+
+struct midgard_payload_vertex_tiler {
+#ifdef T6XX
+        union midgard_primitive_size primitive_size;
+#endif
+
+        struct mali_vertex_tiler_prefix prefix;
+
+#ifdef T6XX
+        u32 zero3;
+#endif
+        u32 gl_enables; // 0x5
+
+        /* Offset for first vertex in buffer */
+        u32 draw_start;
+
+#ifdef T6XX
+        u32 zero5;
+#else
+        u64 zero5;
+#endif
+
+        struct mali_vertex_tiler_postfix postfix;
+
+#ifdef T8XX
+        union midgard_primitive_size primitive_size;
+#endif
+} __attribute__((packed));
+
+struct bifrost_payload_vertex {
+        struct mali_vertex_tiler_prefix prefix;
+        struct bifrost_vertex_only vertex;
+        struct mali_vertex_tiler_postfix postfix;
+} __attribute__((packed));
+
+struct bifrost_payload_tiler {
+        struct mali_vertex_tiler_prefix prefix;
+        struct bifrost_tiler_only tiler;
+        struct mali_vertex_tiler_postfix postfix;
+} __attribute__((packed));
+
+struct bifrost_payload_fused {
+        struct mali_vertex_tiler_prefix prefix;
+        struct bifrost_tiler_only tiler;
+        struct mali_vertex_tiler_postfix tiler_postfix;
+        struct bifrost_vertex_only vertex;
+        struct mali_vertex_tiler_postfix vertex_postfix;
+} __attribute__((packed));
+
+/* Pointed to from texture_trampoline, mostly unknown still, haven't
+ * managed to replay successfully */
+
+/* Purposeful off-by-one in width, height fields. For example, a (64, 64)
+ * texture is stored as (63, 63) in these fields. This adjusts for that.
+ * There's an identical pattern in the framebuffer descriptor. Even vertex
+ * count fields work this way, hence the generic name -- integral fields that
+ * are strictly positive generally need this adjustment. */
+
+#define MALI_POSITIVE(dim) (dim - 1)
+
+/* Opposite of MALI_POSITIVE, found in the depth_units field */
+
+#define MALI_NEGATIVE(dim) (dim + 1)
+
+/* Used with wrapping. Incomplete (this is a 4-bit field...) */
+
+enum mali_wrap_mode {
+        MALI_WRAP_REPEAT = 0x8,
+        MALI_WRAP_CLAMP_TO_EDGE = 0x9,
+        MALI_WRAP_CLAMP_TO_BORDER = 0xB,
+        MALI_WRAP_MIRRORED_REPEAT = 0xC
+};
+
+/* 8192x8192 */
+#define MAX_MIP_LEVELS (13)
+
+/* Cubemap bloats everything up */
+#define MAX_FACES (6)
+
+/* Corresponds to the type passed to glTexImage2D and so forth */
+
+struct mali_texture_format {
+        unsigned swizzle : 12;
+        enum mali_format format : 8;
+
+        unsigned usage1 : 3;
+        unsigned is_not_cubemap : 1;
+        unsigned usage2 : 8;
+} __attribute__((packed));
+
+struct mali_texture_descriptor {
+        uint16_t width;
+        uint16_t height;
+        uint16_t depth;
+
+        uint16_t unknown1;
+
+        struct mali_texture_format format;
+
+        uint16_t unknown3;
+
+        /* One for non-mipmapped, zero for mipmapped */
+        uint8_t unknown3A;
+
+        /* Zero for non-mipmapped, (number of levels - 1) for mipmapped */
+        uint8_t nr_mipmap_levels;
+
+        /* Swizzling is a single 32-bit word, broken up here for convenience.
+         * Here, swizzling refers to the ES 3.0 texture parameters for channel
+         * level swizzling, not the internal pixel-level swizzling which is
+         * below OpenGL's reach */
+
+        unsigned swizzle : 12;
+        unsigned swizzle_zero       : 20;
+
+        uint32_t unknown5;
+        uint32_t unknown6;
+        uint32_t unknown7;
+
+        mali_ptr swizzled_bitmaps[MAX_MIP_LEVELS * MAX_FACES];
+} __attribute__((packed));
+
+/* Used as part of filter_mode */
+
+#define MALI_LINEAR 0
+#define MALI_NEAREST 1
+#define MALI_MIP_LINEAR (0x18)
+
+/* Used to construct low bits of filter_mode */
+
+#define MALI_TEX_MAG(mode) (((mode) & 1) << 0)
+#define MALI_TEX_MIN(mode) (((mode) & 1) << 1)
+
+#define MALI_TEX_MAG_MASK (1)
+#define MALI_TEX_MIN_MASK (2)
+
+#define MALI_FILTER_NAME(filter) (filter ? "MALI_NEAREST" : "MALI_LINEAR")
+
+/* Used for lod encoding. Thanks @urjaman for pointing out these routines can
+ * be cleaned up a lot. */
+
+#define DECODE_FIXED_16(x) ((float) (x / 256.0))
+
+static inline uint16_t
+FIXED_16(float x)
+{
+        /* Clamp inputs, accounting for float error */
+        float max_lod = (32.0 - (1.0 / 512.0));
+
+        x = ((x > max_lod) ? max_lod : ((x < 0.0) ? 0.0 : x));
+
+        return (int) (x * 256.0);
+}
+
+struct mali_sampler_descriptor {
+        uint32_t filter_mode;
+
+        /* Fixed point. Upper 8-bits is before the decimal point, although it
+         * caps [0-31]. Lower 8-bits is after the decimal point: int(round(x *
+         * 256)) */
+
+        uint16_t min_lod;
+        uint16_t max_lod;
+
+        /* All one word in reality, but packed a bit */
+
+        enum mali_wrap_mode wrap_s : 4;
+        enum mali_wrap_mode wrap_t : 4;
+        enum mali_wrap_mode wrap_r : 4;
+        enum mali_alt_func compare_func : 3;
+
+        /* A single set bit of unknown, ha! */
+        unsigned unknown2 : 1;
+
+        unsigned zero : 16;
+
+        uint32_t zero2;
+        float border_color[4];
+} __attribute__((packed));
+
+/* TODO: What are the floats? Apparently always { -inf, -inf, inf, inf },
+ * unless the scissor test is enabled.
+ *
+ * viewport0/viewport1 form the arguments to glViewport. viewport1 is modified
+ * by MALI_POSITIVE; viewport0 is as-is.
+ */
+
+struct mali_viewport {
+        float floats[4];
+
+        float depth_range_n;
+        float depth_range_f;
+
+        u16 viewport0[2];
+        u16 viewport1[2];
+} __attribute__((packed));
+
+/* TODO: Varying meta is symmetrical with attr_meta, but there is some
+ * weirdness associated. Figure it out. */
+
+struct mali_unknown6 {
+        u64 unknown0;
+        u64 unknown1;
+};
+
+/* From presentations, 16x16 tiles externally. Use shift for fast computation
+ * of tile numbers. */
+
+#define MALI_TILE_SHIFT 4
+#define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT)
+
+/* Tile coordinates are stored as a compact u32, as only 12 bits are needed to
+ * each component. Notice that this provides a theoretical upper bound of (1 <<
+ * 12) = 4096 tiles in each direction, addressing a maximum framebuffer of size
+ * 65536x65536. Multiplying that together, times another four given that Mali
+ * framebuffers are 32-bit ARGB8888, means that this upper bound would take 16
+ * gigabytes of RAM just to store the uncompressed framebuffer itself, let
+ * alone rendering in real-time to such a buffer.
+ *
+ * Nice job, guys.*/
+
+/* From mali_kbase_10969_workaround.c */
+#define MALI_X_COORD_MASK 0x00000FFF
+#define MALI_Y_COORD_MASK 0x0FFF0000
+
+/* Extract parts of a tile coordinate */
+
+#define MALI_TILE_COORD_X(coord) ((coord) & MALI_X_COORD_MASK)
+#define MALI_TILE_COORD_Y(coord) (((coord) & MALI_Y_COORD_MASK) >> 16)
+#define MALI_TILE_COORD_FLAGS(coord) ((coord) & ~(MALI_X_COORD_MASK | MALI_Y_COORD_MASK))
+
+/* No known flags yet, but just in case...? */
+
+#define MALI_TILE_NO_FLAG (0)
+
+/* Helpers to generate tile coordinates based on the boundary coordinates in
+ * screen space. So, with the bounds (0, 0) to (128, 128) for the screen, these
+ * functions would convert it to the bounding tiles (0, 0) to (7, 7).
+ * Intentional "off-by-one"; finding the tile number is a form of fencepost
+ * problem. */
+
+#define MALI_MAKE_TILE_COORDS(X, Y) ((X) | ((Y) << 16))
+#define MALI_BOUND_TO_TILE(B, bias) ((B - bias) >> MALI_TILE_SHIFT)
+#define MALI_COORDINATE_TO_TILE(W, H, bias) MALI_MAKE_TILE_COORDS(MALI_BOUND_TO_TILE(W, bias), MALI_BOUND_TO_TILE(H, bias))
+#define MALI_COORDINATE_TO_TILE_MIN(W, H) MALI_COORDINATE_TO_TILE(W, H, 0)
+#define MALI_COORDINATE_TO_TILE_MAX(W, H) MALI_COORDINATE_TO_TILE(W, H, 1)
+
+struct mali_payload_fragment {
+        u32 min_tile_coord;
+        u32 max_tile_coord;
+        mali_ptr framebuffer;
+} __attribute__((packed));
+
+/* (Single?) Framebuffer Descriptor */
+
+/* Flags apply to format. With just MSAA_A and MSAA_B, the framebuffer is
+ * configured for 4x. With MSAA_8, it is configured for 8x. */
+
+#define MALI_FRAMEBUFFER_MSAA_8 (1 << 3)
+#define MALI_FRAMEBUFFER_MSAA_A (1 << 4)
+#define MALI_FRAMEBUFFER_MSAA_B (1 << 23)
+
+/* Fast/slow based on whether all three buffers are cleared at once */
+
+#define MALI_CLEAR_FAST         (1 << 18)
+#define MALI_CLEAR_SLOW         (1 << 28)
+#define MALI_CLEAR_SLOW_STENCIL (1 << 31)
+
+struct mali_single_framebuffer {
+        u32 unknown1;
+        u32 unknown2;
+        u64 unknown_address_0;
+        u64 zero1;
+        u64 zero0;
+
+        /* Exact format is ironically not known, since EGL is finnicky with the
+         * blob. MSAA, colourspace, etc are configured here. */
+
+        u32 format;
+
+        u32 clear_flags;
+        u32 zero2;
+
+        /* Purposeful off-by-one in these fields should be accounted for by the
+         * MALI_DIMENSION macro */
+
+        u16 width;
+        u16 height;
+
+        u32 zero3[8];
+
+        /* By default, the framebuffer is upside down from OpenGL's
+         * perspective. Set framebuffer to the end and negate the stride to
+         * flip in the Y direction */
+
+        mali_ptr framebuffer;
+        int32_t stride;
+
+        u32 zero4;
+
+        /* Depth and stencil buffers are interleaved, it appears, as they are
+         * set to the same address in captures. Both fields set to zero if the
+         * buffer is not being cleared. Depending on GL_ENABLE magic, you might
+         * get a zero enable despite the buffer being present; that still is
+         * disabled. */
+
+        mali_ptr depth_buffer; // not SAME_VA
+        u64 depth_buffer_enable;
+
+        mali_ptr stencil_buffer; // not SAME_VA
+        u64 stencil_buffer_enable;
+
+        u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
+        u32 clear_color_2; // always equal, but unclear function?
+        u32 clear_color_3; // always equal, but unclear function?
+        u32 clear_color_4; // always equal, but unclear function?
+
+        /* Set to zero if not cleared */
+
+        float clear_depth_1; // float32, ditto
+        float clear_depth_2; // float32, ditto
+        float clear_depth_3; // float32, ditto
+        float clear_depth_4; // float32, ditto
+
+        u32 clear_stencil; // Exactly as it appears in OpenGL
+
+        u32 zero6[7];
+
+        /* Very weird format, see generation code in trans_builder.c */
+        u32 resolution_check;
+
+        u32 tiler_flags;
+
+        u64 unknown_address_1; /* Pointing towards... a zero buffer? */
+        u64 unknown_address_2;
+
+        /* See mali_kbase_replay.c */
+        u64 tiler_heap_free;
+        u64 tiler_heap_end;
+
+        /* More below this, maybe */
+} __attribute__((packed));
+
+/* Format bits for the render target */
+
+#define MALI_MFBD_FORMAT_AFBC 	  (1 << 10)
+#define MALI_MFBD_FORMAT_MSAA 	  (1 << 12)
+#define MALI_MFBD_FORMAT_NO_ALPHA (1 << 25)
+
+struct bifrost_render_target {
+        u32 unk1; // = 0x4000000
+        u32 format;
+
+        u64 zero1;
+
+        union {
+                struct {
+                        /* Stuff related to ARM Framebuffer Compression. When AFBC is enabled,
+                         * there is an extra metadata buffer that contains 16 bytes per tile.
+                         * The framebuffer needs to be the same size as before, since we don't
+                         * know ahead of time how much space it will take up. The
+                         * framebuffer_stride is set to 0, since the data isn't stored linearly
+                         * anymore.
+                         */
+
+                        mali_ptr metadata;
+                        u32 stride; // stride in units of tiles
+                        u32 unk; // = 0x20000
+                } afbc;
+
+                struct {
+                        /* Heck if I know */
+                        u64 unk;
+                        mali_ptr pointer;
+                } chunknown;
+        };
+
+        mali_ptr framebuffer;
+
+        u32 zero2 : 4;
+        u32 framebuffer_stride : 28; // in units of bytes
+        u32 zero3;
+
+        u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
+        u32 clear_color_2; // always equal, but unclear function?
+        u32 clear_color_3; // always equal, but unclear function?
+        u32 clear_color_4; // always equal, but unclear function?
+} __attribute__((packed));
+
+/* An optional part of bifrost_framebuffer. It comes between the main structure
+ * and the array of render targets. It must be included if any of these are
+ * enabled:
+ *
+ * - Transaction Elimination
+ * - Depth/stencil
+ * - TODO: Anything else?
+ */
+
+struct bifrost_fb_extra {
+        mali_ptr checksum;
+        /* Each tile has an 8 byte checksum, so the stride is "width in tiles * 8" */
+        u32 checksum_stride;
+
+        u32 unk;
+
+        union {
+                /* Note: AFBC is only allowed for 24/8 combined depth/stencil. */
+                struct {
+                        mali_ptr depth_stencil_afbc_metadata;
+                        u32 depth_stencil_afbc_stride; // in units of tiles
+                        u32 zero1;
+
+                        mali_ptr depth_stencil;
+
+                        u64 padding;
+                } ds_afbc;
+
+                struct {
+                        /* Depth becomes depth/stencil in case of combined D/S */
+                        mali_ptr depth;
+                        u32 depth_stride_zero : 4;
+                        u32 depth_stride : 28;
+                        u32 zero1;
+
+                        mali_ptr stencil;
+                        u32 stencil_stride_zero : 4;
+                        u32 stencil_stride : 28;
+                        u32 zero2;
+                } ds_linear;
+        };
+
+
+        u64 zero3, zero4;
+} __attribute__((packed));
+
+/* flags for unk3 */
+#define MALI_MFBD_EXTRA (1 << 13)
+
+struct bifrost_framebuffer {
+        u32 unk0; // = 0x10
+
+        u32 unknown2; // = 0x1f, same as SFBD
+        mali_ptr scratchpad;
+
+        /* 0x10 */
+        mali_ptr sample_locations;
+        mali_ptr unknown1;
+        /* 0x20 */
+        u16 width1, height1;
+        u32 zero3;
+        u16 width2, height2;
+        u32 unk1 : 19; // = 0x01000
+        u32 rt_count_1 : 2; // off-by-one (use MALI_POSITIVE)
+        u32 unk2 : 3; // = 0
+        u32 rt_count_2 : 3; // no off-by-one
+        u32 zero4 : 5;
+        /* 0x30 */
+        u32 clear_stencil : 8;
+        u32 unk3 : 24; // = 0x100
+        float clear_depth;
+        mali_ptr tiler_meta;
+        /* 0x40 */
+
+        /* Note: these are guesses! */
+        mali_ptr tiler_scratch_start;
+        mali_ptr tiler_scratch_middle;
+
+        /* These are not, since we see symmetry with replay jobs which name these explicitly */
+        mali_ptr tiler_heap_start;
+        mali_ptr tiler_heap_end;
+        
+        u64 zero9, zero10, zero11, zero12;
+
+        /* optional: struct bifrost_fb_extra extra */
+        /* struct bifrost_render_target rts[] */
+} __attribute__((packed));
+
+#endif /* __PANFROST_JOB_H__ */
diff --git a/src/gallium/drivers/panfrost/include/panfrost-misc.h b/src/gallium/drivers/panfrost/include/panfrost-misc.h
new file mode 100644
index 00000000000..82363d8730b
--- /dev/null
+++ b/src/gallium/drivers/panfrost/include/panfrost-misc.h
@@ -0,0 +1,47 @@
+/*
+ * © Copyright 2017-2018 The Panfrost Community
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __PANFROST_MISC_H__
+#define __PANFROST_MISC_H__
+
+#include <inttypes.h>
+
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t  s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+typedef uint64_t mali_ptr;
+
+#define MALI_PTR_FMT "0x%" PRIx64
+
+/* FIXME: put this somewhere more fitting */
+#define MALI_MEM_MAP_TRACKING_HANDLE (3ull << 12)
+
+#endif
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
new file mode 100644
index 00000000000..fdf66c02138
--- /dev/null
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -0,0 +1,52 @@
+# Copyright © 2017 Intel Corporation
+# Copyright © 2018 Alyssa Rosenzweig
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_panfrost = files(
+  'pan_public.h',
+  'pan_screen.c',
+  'pan_screen.h',
+)
+
+inc_panfrost = [
+  inc_common,
+  inc_gallium,
+  inc_gallium_aux,
+  inc_drm_uapi,
+  inc_include,
+  inc_src,
+  include_directories('include')
+]
+
+libpanfrost = static_library(
+  'panfrost',
+  [files_panfrost],
+  dependencies: [
+    dep_thread,
+    idep_nir
+  ],
+  include_directories : inc_panfrost,
+  c_args : [c_vis_args, c_msvc_compat_args],
+)
+
+driver_panfrost = declare_dependency(
+  compile_args : ['-DGALLIUM_PANFROST', '-Wno-pointer-arith'],
+  link_with : [libpanfrost, libpanfrostwinsys],
+)
diff --git a/src/gallium/drivers/panfrost/pan_allocate.h b/src/gallium/drivers/panfrost/pan_allocate.h
new file mode 100644
index 00000000000..2084a339552
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_allocate.h
@@ -0,0 +1,123 @@
+/*
+ * © Copyright 2017-2018 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __PAN_ALLOCATE_H__
+#define __PAN_ALLOCATE_H__
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include "pipebuffer/pb_slab.h"
+
+#include <panfrost-misc.h>
+
+struct panfrost_context;
+
+/* Texture memory */
+
+#define HEAP_TEXTURE 0
+
+/* Single-frame (transient) command stream memory, done at the block scale
+ * rather than the individual cmdstream alllocation scale. We use pb_alloc for
+ * pooling, but we have to implement our own logic atop the API for performance
+ * reasons when considering many low-latency tiny heterogenous allocations */
+
+#define HEAP_TRANSIENT 1
+
+/* Multi-frame descriptor memory (replaces what used to be
+ * cmdstream_persistent), for long-living small allocations */
+
+#define HEAP_DESCRIPTOR 2
+
+/* Represents a fat pointer for GPU-mapped memory, returned from the transient
+ * allocator and not used for much else */
+
+struct panfrost_transfer {
+        uint8_t *cpu;
+        mali_ptr gpu;
+};
+
+struct panfrost_memory {
+        /* Subclassing slab object */
+        struct pb_slab slab;
+
+        /* Backing for the slab in memory */
+        uint8_t *cpu;
+        mali_ptr gpu;
+        int stack_bottom;
+        size_t size;
+};
+
+/* Slab entry sizes range from 2^min to 2^max. In this case, we range from 1k
+ * to 16MB. Numbers are kind of arbitrary but these seem to work alright in
+ * practice. */
+
+#define MIN_SLAB_ENTRY_SIZE (10)
+#define MAX_SLAB_ENTRY_SIZE (24)
+
+struct panfrost_memory_entry {
+        /* Subclass */
+        struct pb_slab_entry base;
+
+        /* Have we been freed? */
+        bool freed;
+
+        /* Offset into the slab of the entry */
+        off_t offset;
+};
+
+/* Functions for replay */
+mali_ptr pandev_upload(int cheating_offset, int *stack_bottom, mali_ptr base, void *base_map, const void *data, size_t sz, bool no_pad);
+mali_ptr pandev_upload_sequential(mali_ptr base, void *base_map, const void *data, size_t sz);
+
+/* Functions for the actual Galliumish driver */
+mali_ptr panfrost_upload(struct panfrost_memory *mem, const void *data, size_t sz, bool no_pad);
+mali_ptr panfrost_upload_sequential(struct panfrost_memory *mem, const void *data, size_t sz);
+
+struct panfrost_transfer
+panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz);
+
+mali_ptr
+panfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz);
+
+void *
+panfrost_allocate_transfer(struct panfrost_memory *mem, size_t sz, mali_ptr *gpu);
+
+static inline mali_ptr
+panfrost_reserve(struct panfrost_memory *mem, size_t sz)
+{
+        mem->stack_bottom += sz;
+        return mem->gpu + (mem->stack_bottom - sz);
+}
+
+struct panfrost_transfer
+panfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap_id);
+
+#include <math.h>
+#define inff INFINITY
+
+#define R(...) #__VA_ARGS__
+#define ALIGN(x, y) (((x) + ((y) - 1)) & ~((y) - 1))
+
+#endif /* __PAN_ALLOCATE_H__ */
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
new file mode 100644
index 00000000000..f1378583360
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -0,0 +1,362 @@
+/*
+ * © Copyright 2018 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __BUILDER_H__
+#define __BUILDER_H__
+
+#define MFBD
+
+#define _LARGEFILE64_SOURCE 1
+#define CACHE_LINE_SIZE 1024 /* TODO */
+#include <sys/mman.h>
+#include <assert.h>
+#include "pan_resource.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_config.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_blitter.h"
+
+/* Forward declare to avoid extra header dep */
+struct prim_convert_context;
+
+/* TODO: Handle on newer hardware */
+#ifdef MFBD
+#define PANFROST_DEFAULT_FBD (MALI_MFBD)
+#define PANFROST_FRAMEBUFFER struct bifrost_framebuffer
+#else
+#define PANFROST_DEFAULT_FBD (MALI_SFBD)
+#define PANFROST_FRAMEBUFFER struct mali_single_framebuffer
+#endif
+
+#define MAX_DRAW_CALLS 4096
+#define MAX_VARYINGS   4096
+
+//#define PAN_DIRTY_CLEAR	     (1 << 0)
+#define PAN_DIRTY_RASTERIZER (1 << 2)
+#define PAN_DIRTY_FS	     (1 << 3)
+#define PAN_DIRTY_FRAG_CORE  (PAN_DIRTY_FS) /* Dirty writes are tied */
+#define PAN_DIRTY_VS	     (1 << 4)
+#define PAN_DIRTY_VERTEX     (1 << 5)
+#define PAN_DIRTY_VERT_BUF   (1 << 6)
+//#define PAN_DIRTY_VIEWPORT   (1 << 7)
+#define PAN_DIRTY_SAMPLERS   (1 << 8)
+#define PAN_DIRTY_TEXTURES   (1 << 9)
+
+struct panfrost_constant_buffer {
+        bool dirty;
+        size_t size;
+        void *buffer;
+};
+
+struct panfrost_query {
+        /* Passthrough from Gallium */
+        unsigned type;
+        unsigned index;
+
+        /* Memory for the GPU to writeback the value of the query */
+        struct panfrost_transfer transfer;
+};
+
+#define PANFROST_MAX_TRANSIENT_ENTRIES 64
+
+struct panfrost_transient_pool {
+        /* Memory blocks in the pool */
+        struct panfrost_memory_entry *entries[PANFROST_MAX_TRANSIENT_ENTRIES];
+
+        /* Number of entries we own */
+        unsigned entry_count;
+
+        /* Current entry that we are writing to, zero-indexed, strictly less than entry_count */
+        unsigned entry_index;
+
+        /* Number of bytes into the current entry we are */
+        off_t entry_offset;
+
+        /* Entry size (all entries must be homogenous) */
+        size_t entry_size;
+};
+
+struct panfrost_context {
+        /* Gallium context */
+        struct pipe_context base;
+
+        struct pipe_framebuffer_state pipe_framebuffer;
+
+        /* The number of concurrent FBOs allowed depends on the number of pools
+         * used; pools are ringed for parallelism opportunities */
+
+        struct panfrost_transient_pool transient_pools[2];
+        int cmdstream_i;
+
+        struct panfrost_memory cmdstream_persistent;
+        struct panfrost_memory shaders;
+        struct panfrost_memory scratchpad;
+        struct panfrost_memory tiler_heap;
+        struct panfrost_memory varying_mem;
+        struct panfrost_memory misc_0;
+        struct panfrost_memory misc_1;
+        struct panfrost_memory depth_stencil_buffer;
+
+        struct {
+                unsigned buffers;
+                const union pipe_color_union *color;
+                double depth;
+                unsigned stencil;
+        } last_clear;
+
+        struct panfrost_query *occlusion_query;
+
+        /* Each render job has multiple framebuffer descriptors associated with
+         * it, used for various purposes with more or less the same format. The
+         * most obvious is the fragment framebuffer descriptor, which carries
+         * e.g. clearing information */
+
+#ifdef SFBD
+        struct mali_single_framebuffer fragment_fbd;
+#else
+        struct bifrost_framebuffer fragment_fbd;
+
+        struct bifrost_fb_extra fragment_extra;
+
+        struct bifrost_render_target fragment_rts[4];
+#endif
+
+        /* Each draw has corresponding vertex and tiler payloads */
+        struct midgard_payload_vertex_tiler payload_vertex;
+        struct midgard_payload_vertex_tiler payload_tiler;
+
+        /* The fragment shader binary itself is pointed here (for the tripipe) but
+         * also everything else in the shader core, including blending, the
+         * stencil/depth tests, etc. Refer to the presentations. */
+
+        struct mali_shader_meta fragment_shader_core;
+
+        /* A frame is composed of a starting set value job, a number of vertex
+         * and tiler jobs, linked to the fragment job at the end. See the
+         * presentations for more information how this works */
+
+        unsigned draw_count;
+
+        mali_ptr set_value_job;
+        mali_ptr vertex_jobs[MAX_DRAW_CALLS];
+        mali_ptr tiler_jobs[MAX_DRAW_CALLS];
+
+        struct mali_job_descriptor_header *u_set_value_job;
+        struct mali_job_descriptor_header *u_vertex_jobs[MAX_DRAW_CALLS];
+        struct mali_job_descriptor_header *u_tiler_jobs[MAX_DRAW_CALLS];
+
+        unsigned vertex_job_count;
+        unsigned tiler_job_count;
+
+        /* Per-draw Dirty flags are setup like any other driver */
+        int dirty;
+
+        /* Per frame dirty flag - whether there was a clear. If not, we need to do a partial update, maybe */
+        bool frame_cleared;
+
+        unsigned vertex_count;
+
+        union mali_attr attributes[PIPE_MAX_ATTRIBS];
+
+        unsigned varying_height;
+
+        struct mali_viewport *viewport;
+        PANFROST_FRAMEBUFFER vt_framebuffer;
+
+        /* TODO: Multiple uniform buffers (index =/= 0), finer updates? */
+
+        struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+
+        /* CSOs */
+        struct panfrost_rasterizer *rasterizer;
+
+        struct panfrost_shader_variants *vs;
+        struct panfrost_shader_variants *fs;
+
+        struct panfrost_vertex_state *vertex;
+
+        struct pipe_vertex_buffer *vertex_buffers;
+        unsigned vertex_buffer_count;
+
+        struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+        unsigned sampler_count[PIPE_SHADER_TYPES];
+
+        struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+        unsigned sampler_view_count[PIPE_SHADER_TYPES];
+
+        struct primconvert_context *primconvert;
+        struct blitter_context *blitter;
+
+        struct panfrost_blend_state *blend;
+
+        struct pipe_viewport_state pipe_viewport;
+        struct pipe_scissor_state scissor;
+        struct pipe_blend_color blend_color;
+        struct pipe_depth_stencil_alpha_state *depth_stencil;
+        struct pipe_stencil_ref stencil_ref;
+};
+
+/* Corresponds to the CSO */
+
+struct panfrost_rasterizer {
+        struct pipe_rasterizer_state base;
+
+        /* Bitmask of front face, etc */
+        unsigned tiler_gl_enables;
+};
+
+struct panfrost_blend_state {
+        struct pipe_blend_state base;
+
+        /* Whether a blend shader is in use */
+        bool has_blend_shader;
+
+        /* Compiled fixed function command */
+        struct mali_blend_equation equation;
+
+        /* Compiled blend shader */
+        mali_ptr blend_shader;
+        int blend_work_count;
+};
+
+/* Internal varyings descriptor */
+struct panfrost_varyings {
+        /* Varyings information: stride of each chunk of memory used for
+         * varyings (similar structure with attributes). Count is just the
+         * number of vec4's. Buffer count is the number of varying chunks (<=
+         * count). Height is used to calculate gl_Position's position ("it's
+         * not a pun, Alyssa!"). Vertex-only varyings == descriptor for
+         * gl_Position and something else apparently occupying the same space.
+         * Varyings == main varyings descriptors following typical mali_attr
+         * conventions. */
+
+        unsigned varyings_stride[MAX_VARYINGS];
+        unsigned varying_count;
+        unsigned varying_buffer_count;
+
+        /* Map of the actual varyings buffer */
+        uint8_t *varyings_buffer_cpu;
+        mali_ptr varyings_descriptor;
+        mali_ptr varyings_descriptor_fragment;
+};
+
+/* Variants bundle together to form the backing CSO, bundling multiple
+ * shaders with varying emulated features baked in (alpha test
+ * parameters, etc) */
+#define MAX_SHADER_VARIANTS 8
+
+/* A shader state corresponds to the actual, current variant of the shader */
+struct panfrost_shader_state {
+        struct pipe_shader_state *base;
+
+        /* Compiled, mapped descriptor, ready for the hardware */
+        bool compiled;
+        struct mali_shader_meta *tripipe;
+        mali_ptr tripipe_gpu;
+
+        /* Non-descript information */
+        int uniform_count;
+        bool can_discard;
+        bool writes_point_size;
+
+        /* Valid for vertex shaders only due to when this is calculated */
+        struct panfrost_varyings varyings;
+
+        /* Information on this particular shader variant */
+        struct pipe_alpha_state alpha_state;
+};
+
+/* A collection of varyings (the CSO) */
+struct panfrost_shader_variants {
+        struct pipe_shader_state base;
+
+        struct panfrost_shader_state variants[MAX_SHADER_VARIANTS];
+        unsigned variant_count;
+
+        /* The current active variant */
+        unsigned active_variant;
+};
+
+struct panfrost_vertex_state {
+        unsigned num_elements;
+
+        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+        int nr_components[PIPE_MAX_ATTRIBS];
+
+        /* The actual attribute meta, prebaked and GPU mapped. TODO: Free memory */
+        struct mali_attr_meta *hw;
+        mali_ptr descriptor_ptr;
+};
+
+struct panfrost_sampler_state {
+        struct pipe_sampler_state base;
+        struct mali_sampler_descriptor hw;
+};
+
+/* Misnomer: Sampler view corresponds to textures, not samplers */
+
+struct panfrost_sampler_view {
+        struct pipe_sampler_view base;
+        struct mali_texture_descriptor hw;
+};
+
+static inline struct panfrost_context *
+pan_context(struct pipe_context *pcontext)
+{
+        return (struct panfrost_context *) pcontext;
+}
+
+static inline struct panfrost_screen *
+pan_screen(struct pipe_screen *p)
+{
+   return (struct panfrost_screen *)p;
+}
+
+struct pipe_context *
+panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
+
+void
+panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data);
+
+struct panfrost_transfer
+panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler);
+
+unsigned
+panfrost_get_default_swizzle(unsigned components);
+
+void
+panfrost_flush(
+        struct pipe_context *pipe,
+        struct pipe_fence_handle **fence,
+        unsigned flags);
+
+void
+panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state);
+
+#endif
diff --git a/src/gallium/drivers/panfrost/pan_public.h b/src/gallium/drivers/panfrost/pan_public.h
new file mode 100644
index 00000000000..f57fd6157f4
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_public.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2018 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PAN_PUBLIC_H
+#define PAN_PUBLIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen;
+struct renderonly;
+
+struct pipe_screen *
+panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h
new file mode 100644
index 00000000000..78baffbd1b2
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -0,0 +1,85 @@
+/*
+ * © Copyright2018-2019 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef PAN_RESOURCE_H
+#define PAN_RESOURCE_H
+
+#include <panfrost-job.h>
+#include "pan_screen.h"
+#include "pan_allocate.h"
+#include <drm.h>
+
+struct panfrost_bo {
+        /* Address to the BO in question */
+
+        uint8_t *cpu[MAX_MIP_LEVELS];
+
+        /* Not necessarily a GPU mapping of cpu! In case of texture tiling, gpu
+         * points to the GPU-side, tiled texture, while cpu points to the
+         * CPU-side, untiled texture from mesa */
+
+        mali_ptr gpu[MAX_MIP_LEVELS];
+
+        /* Memory entry corresponding to gpu above */
+        struct panfrost_memory_entry *entry[MAX_MIP_LEVELS];
+
+        /* Set for tiled, clear for linear. */
+        bool tiled;
+
+        /* Is something other than level 0 ever written? */
+        bool is_mipmap;
+
+        /* If AFBC is enabled for this resource, we lug around an AFBC
+         * metadata buffer as well. The actual AFBC resource is also in
+         * afbc_slab (only defined for AFBC) at position afbc_main_offset */
+
+        bool has_afbc;
+        struct panfrost_memory afbc_slab;
+        int afbc_metadata_size;
+
+        /* Similarly for TE */
+        bool has_checksum;
+        struct panfrost_memory checksum_slab;
+        int checksum_stride;
+};
+
+struct panfrost_resource {
+        struct pipe_resource base;
+
+        struct panfrost_bo *bo;
+        struct renderonly_scanout *scanout;
+};
+
+static inline struct panfrost_resource *
+pan_resource(struct pipe_resource *p)
+{
+   return (struct panfrost_resource *)p;
+}
+
+void panfrost_resource_screen_init(struct panfrost_screen *screen);
+
+void panfrost_resource_context_init(struct pipe_context *pctx);
+
+#endif /* PAN_RESOURCE_H */
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
new file mode 100644
index 00000000000..0e745583940
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -0,0 +1,702 @@
+/**************************************************************************
+ *
+ * Copyright 2008 VMware, Inc.
+ * Copyright 2014 Broadcom
+ * Copyright 2018 Alyssa Rosenzweig
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_video.h"
+#include "util/os_time.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "draw/draw_context.h"
+#include <xf86drm.h>
+
+#include <fcntl.h>
+
+#include "drm_fourcc.h"
+
+#include "pan_screen.h"
+#include "pan_resource.h"
+#include "pan_public.h"
+
+#include "pan_context.h"
+
+static const char *
+panfrost_get_name(struct pipe_screen *screen)
+{
+        return "panfrost";
+}
+
+static const char *
+panfrost_get_vendor(struct pipe_screen *screen)
+{
+        return "panfrost";
+}
+
+static const char *
+panfrost_get_device_vendor(struct pipe_screen *screen)
+{
+        return "Arm";
+}
+
+static int
+panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+        switch (param) {
+        case PIPE_CAP_NPOT_TEXTURES:
+        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+        case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+                return 1;
+
+        case PIPE_CAP_SM3:
+                return 1;
+
+        case PIPE_CAP_POINT_SPRITE:
+                return 1;
+
+        case PIPE_CAP_MAX_RENDER_TARGETS:
+                return PIPE_MAX_COLOR_BUFS;
+
+        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+                return 1;
+
+        case PIPE_CAP_OCCLUSION_QUERY:
+        case PIPE_CAP_QUERY_TIME_ELAPSED:
+        case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+                return 1; /* TODO: Queries */
+
+        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+                return 1;
+
+        case PIPE_CAP_TEXTURE_SWIZZLE:
+                return 1;
+
+        case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+                return 0;
+
+        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+                return 13;
+
+        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+                return 1;
+
+        case PIPE_CAP_INDEP_BLEND_ENABLE:
+                return 1;
+
+        case PIPE_CAP_INDEP_BLEND_FUNC:
+                return 1;
+
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+                return 1;
+
+        case PIPE_CAP_DEPTH_CLIP_DISABLE:
+                return 1;
+
+        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+                return 0; /* no streamout */
+
+        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+                return 16 * 4;
+
+        case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+        case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+                return 1024;
+
+        case PIPE_CAP_MAX_VERTEX_STREAMS:
+                return 1;
+
+        case PIPE_CAP_PRIMITIVE_RESTART:
+                return 0; /* We don't understand this yet */
+
+        case PIPE_CAP_SHADER_STENCIL_EXPORT:
+                return 1;
+
+        case PIPE_CAP_TGSI_INSTANCEID:
+        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+        case PIPE_CAP_START_INSTANCE:
+                return 0; /* TODO: Instances */
+
+        case PIPE_CAP_SEAMLESS_CUBE_MAP:
+        case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+                return 1;
+
+        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+                return 256; /* for GL3 */
+
+        case PIPE_CAP_MIN_TEXEL_OFFSET:
+                return -8;
+
+        case PIPE_CAP_MAX_TEXEL_OFFSET:
+                return 7;
+
+        case PIPE_CAP_CONDITIONAL_RENDER:
+                return 1;
+
+        case PIPE_CAP_TEXTURE_BARRIER:
+                return 0;
+
+        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */
+        case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */
+                return 1;
+
+        case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+                return 0;
+
+        case PIPE_CAP_GLSL_FEATURE_LEVEL:
+                return 330;
+
+        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+        case PIPE_CAP_TGSI_TEX_TXF_LZ:
+                return 0;
+
+        case PIPE_CAP_COMPUTE:
+                return 0;
+
+        case PIPE_CAP_USER_VERTEX_BUFFERS: /* XXX XXX */
+        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+                return 0;
+
+        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+        case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+        case PIPE_CAP_DOUBLES:
+        case PIPE_CAP_INT64:
+        case PIPE_CAP_INT64_DIVMOD:
+                return 1;
+
+        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+                return 16;
+
+        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+        case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_TEXTURE_MULTISAMPLE:
+                return 0;
+
+        case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
+                return 0xffff;
+
+        case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+                return 64;
+
+        case PIPE_CAP_QUERY_TIMESTAMP:
+        case PIPE_CAP_CUBE_MAP_ARRAY:
+                return 1;
+
+        case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+                return 1;
+
+        case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+                return 0;
+
+        case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+                return 65536;
+
+        case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+                return 0;
+
+        case PIPE_CAP_TGSI_TEXCOORD:
+                return 1; /* XXX: What should this me exactly? */
+
+        case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+                return 0;
+
+        case PIPE_CAP_MAX_VIEWPORTS:
+                return PIPE_MAX_VIEWPORTS;
+
+        case PIPE_CAP_ENDIANNESS:
+                return PIPE_ENDIAN_NATIVE;
+
+        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+                return 4;
+
+        case PIPE_CAP_TEXTURE_GATHER_SM5:
+        case PIPE_CAP_TEXTURE_QUERY_LOD:
+                return 1;
+
+        case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+        case PIPE_CAP_SAMPLE_SHADING:
+        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+                return 0;
+
+        case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+                return 1;
+
+        case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+                return 0;
+
+        case PIPE_CAP_SAMPLER_VIEW_TARGET:
+                return 1;
+
+        case PIPE_CAP_FAKE_SW_MSAA:
+                return 1;
+
+        case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+                return -32;
+
+        case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+                return 31;
+
+        case PIPE_CAP_DRAW_INDIRECT:
+                return 1;
+
+        case PIPE_CAP_QUERY_SO_OVERFLOW:
+                return 1;
+
+        case PIPE_CAP_VENDOR_ID:
+                return 0xFFFFFFFF;
+
+        case PIPE_CAP_DEVICE_ID:
+                return 0xFFFFFFFF;
+
+        case PIPE_CAP_ACCELERATED:
+                return 1;
+
+        case PIPE_CAP_VIDEO_MEMORY: {
+                /* XXX: Do we want to return the full amount fo system memory ? */
+                uint64_t system_memory;
+
+                if (!os_get_total_physical_memory(&system_memory))
+                        return 0;
+
+                if (sizeof(void *) == 4)
+                        /* Cap to 2 GB on 32 bits system. We do this because panfrost does
+                         * eat application memory, which is quite limited on 32 bits. App
+                         * shouldn't expect too much available memory. */
+                        system_memory = MIN2(system_memory, 2048 << 20);
+
+                return (int)(system_memory >> 20);
+        }
+
+        case PIPE_CAP_UMA:
+                return 0;
+
+        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+                return 1;
+
+        case PIPE_CAP_CLIP_HALFZ:
+        case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+                return 1;
+
+        case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+        case PIPE_CAP_CULL_DISTANCE:
+                return 1;
+
+        case PIPE_CAP_VERTEXID_NOBASE:
+                return 0;
+
+        case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+                return 0;
+
+        case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+        case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+                return 1;
+
+        case PIPE_CAP_CLEAR_TEXTURE:
+                return 1;
+
+        case PIPE_CAP_ANISOTROPIC_FILTER:
+        case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+        case PIPE_CAP_DEPTH_BOUNDS_TEST:
+        case PIPE_CAP_TGSI_TXQS:
+        case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+        case PIPE_CAP_SHAREABLE_SHADERS:
+        case PIPE_CAP_DRAW_PARAMETERS:
+        case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+        case PIPE_CAP_MULTI_DRAW_INDIRECT:
+        case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+        case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+        case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+        case PIPE_CAP_INVALIDATE_BUFFER:
+        case PIPE_CAP_GENERATE_MIPMAP:
+        case PIPE_CAP_STRING_MARKER:
+        case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+        case PIPE_CAP_QUERY_BUFFER_OBJECT:
+        case PIPE_CAP_QUERY_MEMORY_INFO:
+        case PIPE_CAP_PCI_GROUP:
+        case PIPE_CAP_PCI_BUS:
+        case PIPE_CAP_PCI_DEVICE:
+        case PIPE_CAP_PCI_FUNCTION:
+        case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+        case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+        case PIPE_CAP_TGSI_VOTE:
+        case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+        case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+        case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+        case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
+        case PIPE_CAP_NATIVE_FENCE_FD:
+        case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+        case PIPE_CAP_TGSI_FS_FBFETCH:
+        case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+        case PIPE_CAP_TGSI_CLOCK:
+        case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+        case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+        case PIPE_CAP_TGSI_BALLOT:
+        case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+        case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+        case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+        case PIPE_CAP_POST_DEPTH_COVERAGE:
+        case PIPE_CAP_BINDLESS_TEXTURE:
+        case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+        case PIPE_CAP_MEMOBJ:
+        case PIPE_CAP_LOAD_CONSTBUF:
+        case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+        case PIPE_CAP_TILE_RASTER_ORDER:
+        case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+        case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+        case PIPE_CAP_CONTEXT_PRIORITY_MASK:
+        case PIPE_CAP_FENCE_SIGNAL:
+        case PIPE_CAP_CONSTBUF0_FLAGS:
+                return 0;
+
+        case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+                return 4;
+
+        default:
+                debug_printf("Unexpected PIPE_CAP %d query\n", param);
+                return 0;
+        }
+}
+
+static int
+panfrost_get_shader_param(struct pipe_screen *screen,
+                          enum pipe_shader_type shader,
+                          enum pipe_shader_cap param)
+{
+        if (shader != PIPE_SHADER_VERTEX &&
+                        shader != PIPE_SHADER_FRAGMENT) {
+                return 0;
+        }
+
+        /* this is probably not totally correct.. but it's a start: */
+        switch (param) {
+        case PIPE_SHADER_CAP_SCALAR_ISA:
+                return 0;
+
+        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+                return 16384;
+
+        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+                return 1024;
+
+        case PIPE_SHADER_CAP_MAX_INPUTS:
+                return 16;
+
+        case PIPE_SHADER_CAP_MAX_OUTPUTS:
+                return shader == PIPE_SHADER_FRAGMENT ? 1 : 8;
+
+        case PIPE_SHADER_CAP_MAX_TEMPS:
+                return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+                return 16 * 1024 * sizeof(float);
+
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+                return 1;
+
+        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+                return 0;
+
+        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+                return 0;
+
+        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+                return 1;
+
+        case PIPE_SHADER_CAP_SUBROUTINES:
+                return 0;
+
+        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+                return 0;
+
+        case PIPE_SHADER_CAP_INTEGERS:
+                return 1;
+
+        case PIPE_SHADER_CAP_INT64_ATOMICS:
+        case PIPE_SHADER_CAP_FP16:
+        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+                return 0;
+
+        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+                return 16; /* XXX: How many? */
+
+        case PIPE_SHADER_CAP_PREFERRED_IR:
+                return PIPE_SHADER_IR_NIR;
+
+        case PIPE_SHADER_CAP_SUPPORTED_IRS:
+                return 0;
+
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+                return 32;
+
+        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+        case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+        case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+        case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+                return 0;
+
+        default:
+                fprintf(stderr, "unknown shader param %d\n", param);
+                return 0;
+        }
+
+        return 0;
+}
+
+static float
+panfrost_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+        switch (param) {
+        case PIPE_CAPF_MAX_LINE_WIDTH:
+
+        /* fall-through */
+        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+                return 255.0; /* arbitrary */
+
+        case PIPE_CAPF_MAX_POINT_WIDTH:
+
+        /* fall-through */
+        case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+                return 255.0; /* arbitrary */
+
+        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+                return 16.0;
+
+        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+                return 16.0; /* arbitrary */
+
+        default:
+                debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+                return 0.0;
+        }
+}
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format  the format to test
+ * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+panfrost_is_format_supported( struct pipe_screen *screen,
+                              enum pipe_format format,
+                              enum pipe_texture_target target,
+                              unsigned sample_count,
+                              unsigned storage_sample_count,
+                              unsigned bind)
+{
+        const struct util_format_description *format_desc;
+
+        assert(target == PIPE_BUFFER ||
+               target == PIPE_TEXTURE_1D ||
+               target == PIPE_TEXTURE_1D_ARRAY ||
+               target == PIPE_TEXTURE_2D ||
+               target == PIPE_TEXTURE_2D_ARRAY ||
+               target == PIPE_TEXTURE_RECT ||
+               target == PIPE_TEXTURE_3D ||
+               target == PIPE_TEXTURE_CUBE ||
+               target == PIPE_TEXTURE_CUBE_ARRAY);
+
+        format_desc = util_format_description(format);
+
+        if (!format_desc)
+                return FALSE;
+
+        if (sample_count > 1)
+                return FALSE;
+
+        /* Format wishlist */
+        if (format == PIPE_FORMAT_Z24X8_UNORM || format == PIPE_FORMAT_X8Z24_UNORM)
+                return FALSE;
+
+        if (bind & PIPE_BIND_RENDER_TARGET) {
+                /* We don't support rendering into anything but RGBA8 yet. We
+                 * need more formats for spec compliance, but for now, honesty
+                 * is the best policy <3 */
+
+                if (!util_format_is_rgba8_variant(format_desc))
+                        return FALSE;
+
+                if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+                        return FALSE;
+
+                /*
+                 * Although possible, it is unnatural to render into compressed or YUV
+                 * surfaces. So disable these here to avoid going into weird paths
+                 * inside the state trackers.
+                 */
+                if (format_desc->block.width != 1 ||
+                                format_desc->block.height != 1)
+                        return FALSE;
+        }
+
+        if (bind & PIPE_BIND_DEPTH_STENCIL) {
+                if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+                        return FALSE;
+        }
+
+        if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
+                        format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
+                /* Compressed formats not yet hooked up. */
+                return FALSE;
+        }
+
+        if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
+                        ((bind & PIPE_BIND_DISPLAY_TARGET) == 0) &&
+                        target != PIPE_BUFFER) {
+                const struct util_format_description *desc =
+                        util_format_description(format);
+
+                if (desc->nr_channels == 3 && desc->is_array) {
+                        /* Don't support any 3-component formats for rendering/texturing
+                         * since we don't support the corresponding 8-bit 3 channel UNORM
+                         * formats.  This allows us to support GL_ARB_copy_image between
+                         * GL_RGB8 and GL_RGB8UI, for example.  Otherwise, we may be asked to
+                         * do a resource copy between PIPE_FORMAT_R8G8B8_UINT and
+                         * PIPE_FORMAT_R8G8B8X8_UNORM, for example, which will not work
+                         * (different bpp).
+                         */
+                        return FALSE;
+                }
+        }
+
+        return TRUE;
+}
+
+
+static void
+panfrost_destroy_screen( struct pipe_screen *screen )
+{
+        FREE(screen);
+}
+
+static void
+panfrost_flush_frontbuffer(struct pipe_screen *_screen,
+                           struct pipe_resource *resource,
+                           unsigned level, unsigned layer,
+                           void *context_private,
+                           struct pipe_box *sub_box)
+{
+        /* TODO: Display target integration */
+}
+
+static uint64_t
+panfrost_get_timestamp(struct pipe_screen *_screen)
+{
+        return os_time_get_nano();
+}
+
+static void
+panfrost_fence_reference(struct pipe_screen *screen,
+                         struct pipe_fence_handle **ptr,
+                         struct pipe_fence_handle *fence)
+{
+        *ptr = fence;
+}
+
+static boolean
+panfrost_fence_finish(struct pipe_screen *screen,
+                      struct pipe_context *ctx,
+                      struct pipe_fence_handle *fence,
+                      uint64_t timeout)
+{
+        assert(fence);
+        return TRUE;
+}
+
+static const void *
+panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
+                                     enum pipe_shader_ir ir,
+                                     enum pipe_shader_type shader)
+{
+        return NULL;
+}
+
+struct pipe_screen *
+panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm)
+{
+        struct panfrost_screen *screen = CALLOC_STRUCT(panfrost_screen);
+
+        if (!screen)
+                return NULL;
+
+        if (ro) {
+                screen->ro = renderonly_dup(ro);
+                if (!screen->ro) {
+                        fprintf(stderr, "Failed to dup renderonly object\n");
+                        free(screen);
+                        return NULL;
+                }
+        }
+
+        screen->base.destroy = panfrost_destroy_screen;
+
+        screen->base.get_name = panfrost_get_name;
+        screen->base.get_vendor = panfrost_get_vendor;
+        screen->base.get_device_vendor = panfrost_get_device_vendor;
+        screen->base.get_param = panfrost_get_param;
+        screen->base.get_shader_param = panfrost_get_shader_param;
+        screen->base.get_paramf = panfrost_get_paramf;
+        screen->base.get_timestamp = panfrost_get_timestamp;
+        screen->base.is_format_supported = panfrost_is_format_supported;
+        //screen->base.context_create = panfrost_create_context;
+        screen->base.flush_frontbuffer = panfrost_flush_frontbuffer;
+        screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
+        screen->base.fence_reference = panfrost_fence_reference;
+        screen->base.fence_finish = panfrost_fence_finish;
+
+	screen->last_fragment_id = -1;
+	screen->last_fragment_flushed = true;
+
+        fprintf(stderr, "stub: Upstream panfrost (use downstream fork)\n");
+        return NULL;
+}
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
new file mode 100644
index 00000000000..07e2ad399f0
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -0,0 +1,88 @@
+/**************************************************************************
+ *
+ * Copyright 2018-2019 Alyssa Rosenzweig
+ * Copyright 2018-2019 Collabora
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PAN_SCREEN_H
+#define PAN_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "renderonly/renderonly.h"
+
+#include <panfrost-misc.h>
+#include "pan_allocate.h"
+
+struct panfrost_context;
+struct panfrost_resource;
+struct panfrost_screen;
+
+//#define DUMP_PERFORMANCE_COUNTERS
+
+struct panfrost_driver {
+	struct panfrost_bo * (*create_bo) (struct panfrost_screen *screen, const struct pipe_resource *template);
+	struct panfrost_bo * (*import_bo) (struct panfrost_screen *screen, struct winsys_handle *whandle);
+	uint8_t * (*map_bo) (struct panfrost_context *ctx, struct pipe_transfer *transfer);
+	void (*unmap_bo) (struct panfrost_context *ctx, struct pipe_transfer *transfer);
+	void (*destroy_bo) (struct panfrost_screen *screen, struct panfrost_bo *bo);
+
+	void (*submit_job) (struct panfrost_context *ctx, mali_ptr addr, int nr_atoms);
+	void (*force_flush_fragment) (struct panfrost_context *ctx);
+	void (*allocate_slab) (struct panfrost_screen *screen,
+		               struct panfrost_memory *mem,
+		               size_t pages,
+		               bool same_va,
+		               int extra_flags,
+		               int commit_count,
+		               int extent);
+	void (*enable_counters) (struct panfrost_screen *screen);
+};
+
+struct panfrost_screen {
+        struct pipe_screen base;
+
+        struct renderonly *ro;
+        struct panfrost_driver *driver;
+
+        struct panfrost_memory perf_counters;
+
+        /* Memory management is based on subdividing slabs with AMD's allocator */
+        struct pb_slabs slabs;
+        
+        /* TODO: Where? */
+        struct panfrost_resource *display_target;
+
+	int last_fragment_id;
+	int last_fragment_flushed;
+};
+
+static inline struct panfrost_screen *
+panfrost_screen( struct pipe_screen *pipe )
+{
+        return (struct panfrost_screen *)pipe;
+}
+
+#endif /* PAN_SCREEN_H */
diff --git a/src/gallium/meson.build b/src/gallium/meson.build
index 5e311217cfe..1626cd524fe 100644
--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@@ -89,6 +89,12 @@ if with_gallium_vc4
 else
   driver_vc4 = declare_dependency()
 endif
+if with_gallium_panfrost
+  subdir('winsys/panfrost/drm')
+  subdir('drivers/panfrost')
+else
+  driver_panfrost = declare_dependency()
+endif
 if with_gallium_etnaviv
   subdir('winsys/etnaviv/drm')
   subdir('drivers/etnaviv')
diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
index 68d226621b2..df77d329efa 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -58,13 +58,15 @@ libgallium_dri = shared_library(
     driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
     driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
     driver_tegra, driver_i915, driver_svga, driver_virgl,
-    driver_swr,
+    driver_swr, driver_panfrost
   ],
 )
 
 foreach d : [[with_gallium_kmsro, 'pl111_dri.so'],
              [with_gallium_kmsro, 'hx8357d_dri.so'],
              [with_gallium_kmsro, 'imx-drm_dri.so'],
+             [with_gallium_kmsro, 'rockchip_dri.so'],
+             [with_gallium_kmsro, 'meson_dri.so'],
              [with_gallium_radeonsi, 'radeonsi_dri.so'],
              [with_gallium_nouveau, 'nouveau_dri.so'],
              [with_gallium_freedreno, ['msm_dri.so', 'kgsl_dri.so']],
@@ -72,6 +74,7 @@ foreach d : [[with_gallium_kmsro, 'pl111_dri.so'],
              [with_gallium_softpipe and with_gallium_drisw_kms, 'kms_swrast_dri.so'],
              [with_gallium_v3d, 'v3d_dri.so'],
              [with_gallium_vc4, 'vc4_dri.so'],
+             [with_gallium_panfrost, 'panfrost_dri.so'],
              [with_gallium_etnaviv, 'etnaviv_dri.so'],
              [with_gallium_tegra, 'tegra_dri.so'],
              [with_gallium_i915, 'i915_dri.so'],
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index 6b0ea785c40..17484ced979 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -83,6 +83,16 @@ DEFINE_LOADER_DRM_ENTRYPOINT(pl111)
 #endif
 #endif
 
+#if defined(GALLIUM_PANFROST)
+DEFINE_LOADER_DRM_ENTRYPOINT(panfrost)
+#if defined(GALLIUM_KMSRO)
+DEFINE_LOADER_DRM_ENTRYPOINT(rockchip)
+DEFINE_LOADER_DRM_ENTRYPOINT(meson)
+#endif
+#endif
+
+
+
 #if defined(GALLIUM_ETNAVIV)
 DEFINE_LOADER_DRM_ENTRYPOINT(imx_drm)
 DEFINE_LOADER_DRM_ENTRYPOINT(etnaviv)
diff --git a/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c b/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
index 36d4e412486..7752474f8aa 100644
--- a/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
+++ b/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
@@ -29,6 +29,7 @@
 #include "vc4/drm/vc4_drm_public.h"
 #include "etnaviv/drm/etnaviv_drm_public.h"
 #include "freedreno/drm/freedreno_drm_public.h"
+#include "panfrost/drm/panfrost_drm_public.h"
 #include "xf86drm.h"
 
 #include "pipe/p_screen.h"
@@ -82,5 +83,29 @@ struct pipe_screen *kmsro_drm_screen_create(int fd)
    }
 #endif
 
+#if defined(GALLIUM_PANFROST)
+   ro.gpu_fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
+
+   bool is_drm = true;
+   if (ro.gpu_fd < 0) {
+      /* For compatibility with legacy kernels, fallback on the non-DRM
+       * interface */
+
+      ro.gpu_fd = open("/dev/mali0", O_RDWR | O_CLOEXEC);
+      is_drm = false;
+   }
+
+   if (ro.gpu_fd >= 0) {
+      ro.create_for_resource = renderonly_create_kms_dumb_buffer_for_resource,
+      screen = panfrost_drm_screen_create_renderonly(&ro, is_drm);
+      if (!screen)
+         close(ro.gpu_fd);
+
+      return screen;
+   }
+#endif
+
+
+
    return screen;
 }
diff --git a/src/gallium/winsys/kmsro/drm/meson.build b/src/gallium/winsys/kmsro/drm/meson.build
index 5ea53dd44bf..51246b68e34 100644
--- a/src/gallium/winsys/kmsro/drm/meson.build
+++ b/src/gallium/winsys/kmsro/drm/meson.build
@@ -28,6 +28,9 @@ endif
 if with_gallium_freedreno
   kmsro_c_args += '-DGALLIUM_FREEDRENO'
 endif
+if with_gallium_panfrost
+  kmsro_c_args += '-DGALLIUM_PANFROST'
+endif
 
 libkmsrowinsys = static_library(
   'kmsrowinsys',
diff --git a/src/gallium/winsys/panfrost/drm/Android.mk b/src/gallium/winsys/panfrost/drm/Android.mk
new file mode 100644
index 00000000000..5f286856b5a
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/Android.mk
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Emil Velikov <[email protected]>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_panfrost
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/panfrost/drm/Makefile.am b/src/gallium/winsys/panfrost/drm/Makefile.am
new file mode 100644
index 00000000000..7a836288b02
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/Makefile.am
@@ -0,0 +1,33 @@
+# Copyright © 2014 Broadco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers \
+	$(GALLIUM_WINSYS_CFLAGS)
+
+noinst_LTLIBRARIES = libpanfrostdrm.la
+
+libpanfrostdrm_la_SOURCES = $(C_SOURCES)
+
+EXTRA_DIST = meson.build
diff --git a/src/gallium/winsys/panfrost/drm/Makefile.sources b/src/gallium/winsys/panfrost/drm/Makefile.sources
new file mode 100644
index 00000000000..24a1073097c
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/Makefile.sources
@@ -0,0 +1,3 @@
+C_SOURCES := \
+	panfrost_drm_public.h \
+	panfrost_drm_winsys.c
diff --git a/src/gallium/winsys/panfrost/drm/meson.build b/src/gallium/winsys/panfrost/drm/meson.build
new file mode 100644
index 00000000000..d19ce2ddb8b
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/meson.build
@@ -0,0 +1,29 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libpanfrostwinsys = static_library(
+  'panfrostwinsys',
+  files('panfrost_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+  ],
+  c_args : [c_vis_args],
+)
diff --git a/src/gallium/winsys/panfrost/drm/panfrost_drm_public.h b/src/gallium/winsys/panfrost/drm/panfrost_drm_public.h
new file mode 100644
index 00000000000..4709c429b96
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/panfrost_drm_public.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2014 Broadcom
+ * Copyright © 208 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __PAN_DRM_PUBLIC_H__
+#define __PAN_DRM_PUBLIC_H__
+
+#include <stdbool.h>
+
+struct pipe_screen;
+struct renderonly;
+
+struct pipe_screen *panfrost_drm_screen_create(int drmFD);
+struct pipe_screen *panfrost_drm_screen_create_renderonly(struct renderonly *ro, bool is_drm);
+
+#endif /* __PAN_DRM_PUBLIC_H__ */
diff --git a/src/gallium/winsys/panfrost/drm/panfrost_drm_winsys.c b/src/gallium/winsys/panfrost/drm/panfrost_drm_winsys.c
new file mode 100644
index 00000000000..7d08743e23c
--- /dev/null
+++ b/src/gallium/winsys/panfrost/drm/panfrost_drm_winsys.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Broadcom
+ * Copyright © 208 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "renderonly/renderonly.h"
+#include "panfrost_drm_public.h"
+#include "panfrost/pan_public.h"
+
+struct pipe_screen *
+panfrost_drm_screen_create(int fd)
+{
+   return panfrost_create_screen(fcntl(fd, F_DUPFD_CLOEXEC, 3), NULL, true);
+}
+
+struct pipe_screen *
+panfrost_drm_screen_create_renderonly(struct renderonly *ro, bool is_drm)
+{
+   return panfrost_create_screen(fcntl(ro->gpu_fd, F_DUPFD_CLOEXEC, 3), ro, is_drm);
+}
author	Alyssa Rosenzweig <[email protected]>	2019-01-29 05:46:07 +0000
committer	Alyssa Rosenzweig <[email protected]>	2019-02-05 01:19:30 +0000
commit	61d3ae6e0bde93c5601278d1a60c44be655a7cb5 (patch)
tree	c0c23980f21110a1459db40e18b06edb92d87ded /src
parent	742d6cdb42e5570a3a74005f18bb89208069d01f (diff)