diff options
author | Alyssa Rosenzweig <[email protected]> | 2019-02-05 04:32:27 +0000 |
---|---|---|
committer | Alyssa Rosenzweig <[email protected]> | 2019-02-07 01:57:50 +0000 |
commit | 7da251fc721360fc28b984507959ebfa0c88c8b2 (patch) | |
tree | 9ba04222609b9ad87cf8f5e4af83e62af7d4c49c /src/gallium/drivers/panfrost | |
parent | 8f4485ef1a8bb0aeda996097f84869fb86bd51d2 (diff) |
panfrost: Check in sources for command stream
This patch includes the command stream portion of the driver,
complementing the earlier compiler. It provides a base for future work,
though it does not integrate with any particular winsys.
Signed-off-by: Alyssa Rosenzweig <[email protected]>
Diffstat (limited to 'src/gallium/drivers/panfrost')
22 files changed, 5441 insertions, 5 deletions
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index f4dec42ad3e..9b90035d691 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -23,10 +23,23 @@ files_panfrost = files( 'pan_public.h', 'pan_screen.c', 'pan_screen.h', + 'pan_resource.c', + 'pan_resource.h', 'midgard/midgard_compile.c', 'midgard/cppwrap.cpp', 'midgard/disassemble.c', + + 'pan_context.c', + 'pan_drm.c', + 'pan_allocate.c', + 'pan_assemble.c', + 'pan_format.c', + 'pan_swizzle.c', + 'pan_blending.c', + 'pan_blend_shaders.c', + 'pan_wallpaper.c', + 'pan_pretty_print.c' ) inc_panfrost = [ diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c new file mode 100644 index 00000000000..3732d253978 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_allocate.c @@ -0,0 +1,220 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <panfrost-misc.h> +#include <panfrost-job.h> +#include "pan_context.h" + +/* TODO: What does this actually have to be? */ +#define ALIGNMENT 128 + +/* Allocate a mapped chunk directly from a heap */ + +struct panfrost_transfer +panfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap_id) +{ + size = ALIGN(size, ALIGNMENT); + + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = panfrost_screen(gallium->screen); + + struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id); + struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry; + struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab; + + struct panfrost_transfer transfer = { + .cpu = backing->cpu + p_entry->offset, + .gpu = backing->gpu + p_entry->offset + }; + + return transfer; +} + +/* Transient command stream pooling: command stream uploads try to simply copy + * into whereever we left off. If there isn't space, we allocate a new entry + * into the pool and copy there */ + +struct panfrost_transfer +panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz) +{ + /* Pad the size */ + sz = ALIGN(sz, ALIGNMENT); + + /* Check if there is room in the current entry */ + struct panfrost_transient_pool *pool = &ctx->transient_pools[ctx->cmdstream_i]; + + if ((pool->entry_offset + sz) > pool->entry_size) { + /* Don't overflow this entry -- advance to the next */ + + pool->entry_offset = 0; + + pool->entry_index++; + assert(pool->entry_index < PANFROST_MAX_TRANSIENT_ENTRIES); + + /* Check if this entry exists */ + + if (pool->entry_index >= pool->entry_count) { + /* Don't overflow the pool -- allocate a new one */ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = panfrost_screen(gallium->screen); + struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT); + + pool->entry_count++; + pool->entries[pool->entry_index] = (struct panfrost_memory_entry *) entry; + } + + /* Make sure we -still- won't overflow */ + assert(sz < pool->entry_size); + } + + /* We have an entry we can write to, so do the upload! */ + struct panfrost_memory_entry *p_entry = pool->entries[pool->entry_index]; + struct panfrost_memory *backing = (struct panfrost_memory *) p_entry->base.slab; + + struct panfrost_transfer ret = { + .cpu = backing->cpu + p_entry->offset + pool->entry_offset, + .gpu = backing->gpu + p_entry->offset + pool->entry_offset + }; + + /* Advance the pointer */ + pool->entry_offset += sz; + + return ret; + +} + +mali_ptr +panfrost_upload_transient(struct panfrost_context *ctx, const void *data, size_t sz) +{ + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sz); + memcpy(transfer.cpu, data, sz); + return transfer.gpu; +} + +// TODO: An actual allocator, perhaps +// TODO: Multiple stacks for multiple bases? + +int hack_stack_bottom = 4096; /* Don't interfere with constant offsets */ +int last_offset = 0; + +static inline int +pandev_allocate_offset(int *stack, size_t sz) +{ + /* First, align the stack bottom to something nice; it's not critical + * at this point if we waste a little space to do so. */ + + int excess = *stack & (ALIGNMENT - 1); + + /* Add the secret of my */ + if (excess) + *stack += ALIGNMENT - excess; + + /* Finally, use the new bottom for the allocation and move down the + * stack */ + + int ret = *stack; + *stack += sz; + return ret; +} + +inline mali_ptr +pandev_upload(int cheating_offset, int *stack_bottom, mali_ptr base, void *base_map, const void *data, size_t sz, bool no_pad) +{ + int offset; + + /* We're not positive about the sizes of all objects, but we don't want + * them to crash against each other either. Let the caller disable + * padding if they so choose, though. */ + + size_t padded_size = no_pad ? sz : sz * 2; + + /* If no specific bottom is specified, use a global one... don't do + * this in production, kids */ + + if (!stack_bottom) + stack_bottom = &hack_stack_bottom; + + /* Allocate space for the new GPU object, if required */ + + if (cheating_offset == -1) { + offset = pandev_allocate_offset(stack_bottom, padded_size); + } else { + offset = cheating_offset; + *stack_bottom = offset + sz; + } + + /* Save last offset for sequential uploads (job descriptors) */ + last_offset = offset + padded_size; + + /* Upload it */ + memcpy((uint8_t *) base_map + offset, data, sz); + + /* Return the GPU address */ + return base + offset; +} + +/* Upload immediately after the last allocation */ + +mali_ptr +pandev_upload_sequential(mali_ptr base, void *base_map, const void *data, size_t sz) +{ + return pandev_upload(last_offset, NULL, base, base_map, data, sz, /* false */ true); +} + +/* Simplified APIs for the real driver, rather than replays */ + +mali_ptr +panfrost_upload(struct panfrost_memory *mem, const void *data, size_t sz, bool no_pad) +{ + /* Bounds check */ + if ((mem->stack_bottom + sz) >= mem->size) { + printf("Out of memory, tried to upload %zd but only %zd available\n", sz, mem->size - mem->stack_bottom); + assert(0); + } + + return pandev_upload(-1, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, no_pad); +} + +mali_ptr +panfrost_upload_sequential(struct panfrost_memory *mem, const void *data, size_t sz) +{ + return pandev_upload(last_offset, &mem->stack_bottom, mem->gpu, mem->cpu, data, sz, true); +} + +/* Simplified interface to allocate a chunk without any upload, to allow + * zero-copy uploads. This is particularly useful when the copy would happen + * anyway, for instance with texture swizzling. */ + +void * +panfrost_allocate_transfer(struct panfrost_memory *mem, size_t sz, mali_ptr *gpu) +{ + int offset = pandev_allocate_offset(&mem->stack_bottom, sz); + + *gpu = mem->gpu + offset; + return mem->cpu + offset; +} diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c new file mode 100644 index 00000000000..91cde49f0d2 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -0,0 +1,226 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pan_context.h" + +#include "compiler/nir/nir.h" +#include "nir/tgsi_to_nir.h" +#include "midgard/midgard_compile.h" +#include "util/u_dynarray.h" + +#include "tgsi/tgsi_dump.h" + +void +panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state) +{ + uint8_t *dst; + + nir_shader *s; + + struct pipe_shader_state *cso = state->base; + + if (cso->type == PIPE_SHADER_IR_NIR) { + s = nir_shader_clone(NULL, cso->ir.nir); + } else { + assert (cso->type == PIPE_SHADER_IR_TGSI); + //tgsi_dump(cso->tokens, 0); + s = tgsi_to_nir(cso->tokens, &midgard_nir_options); + } + + s->info.stage = type == JOB_TYPE_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_FRAGMENT; + + if (s->info.stage == MESA_SHADER_FRAGMENT) { + /* Inject the alpha test now if we need to */ + + if (state->alpha_state.enabled) { + NIR_PASS_V(s, nir_lower_alpha_test, state->alpha_state.func, false); + } + } + + /* Call out to Midgard compiler given the above NIR */ + + midgard_program program = { + .alpha_ref = state->alpha_state.ref_value + }; + + midgard_compile_shader_nir(s, &program, false); + + /* Prepare the compiled binary for upload */ + int size = program.compiled.size; + dst = program.compiled.data; + + /* Inject an external shader */ +#if 0 + char buf[4096]; + + if (type != JOB_TYPE_VERTEX) { + FILE *fp = fopen("/home/alyssa/panfrost/midgard/good.bin", "rb"); + fread(buf, 1, 2816, fp); + fclose(fp); + dst = buf; + size = 2816; + } + +#endif + + /* Upload the shader. The lookahead tag is ORed on as a tagged pointer. + * I bet someone just thought that would be a cute pun. At least, + * that's how I'd do it. */ + + meta->shader = panfrost_upload(&ctx->shaders, dst, size, true) | program.first_tag; + + util_dynarray_fini(&program.compiled); + + meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff); + meta->attribute_count = program.attribute_count; + meta->varying_count = program.varying_count; + meta->midgard1.work_count = program.work_register_count; + + state->can_discard = program.can_discard; + state->writes_point_size = program.writes_point_size; + + /* Separate as primary uniform count is truncated */ + state->uniform_count = program.uniform_count; + + /* gl_Position eats up an extra spot */ + if (type == JOB_TYPE_VERTEX) + meta->varying_count += 1; + + /* gl_FragCoord does -not- eat an extra spot; it will be included in our count if we need it */ + + + meta->midgard1.unknown2 = 8; /* XXX */ + + /* Varyings are known only through the shader. We choose to upload this + * information with the vertex shader, though the choice is perhaps + * arbitrary */ + + if (type == JOB_TYPE_VERTEX) { + struct panfrost_varyings *varyings = &state->varyings; + + /* Measured in vec4 words. Don't include gl_Position */ + int varying_count = program.varying_count; + + /* Setup two buffers, one for position, the other for normal + * varyings, as seen in traces. TODO: Are there other + * configurations we might use? */ + + varyings->varying_buffer_count = 2; + + /* mediump vec4s sequentially */ + varyings->varyings_stride[0] = (2 * sizeof(float)) * varying_count; + + /* highp gl_Position */ + varyings->varyings_stride[1] = 4 * sizeof(float); + + /* mediump gl_PointSize */ + if (program.writes_point_size) { + ++varyings->varying_buffer_count; + varyings->varyings_stride[2] = 2; /* sizeof(fp16) */ + } + + /* Setup gl_Position, its weirdo analogue, and gl_PointSize (optionally) */ + unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1); + unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4); + + struct mali_attr_meta vertex_special_varyings[] = { + { + .index = 1, + .format = MALI_VARYING_POS, + + .swizzle = default_vec4_swizzle, + .unknown1 = 0x2, + }, + { + .index = 1, + .format = MALI_RGBA16F, + + /* TODO: Wat? yyyy swizzle? */ + .swizzle = 0x249, + .unknown1 = 0x0, + }, + { + .index = 2, + .format = MALI_R16F, + .swizzle = default_vec1_swizzle, + .unknown1 = 0x2 + } + }; + + /* How many special vertex varyings are actually required? */ + int vertex_special_count = 2 + (program.writes_point_size ? 1 : 0); + + /* Setup actual varyings. XXX: Don't assume vec4 */ + + struct mali_attr_meta mali_varyings[PIPE_MAX_ATTRIBS]; + + for (int i = 0; i < varying_count; ++i) { + struct mali_attr_meta vec4_varying_meta = { + .index = 0, + .format = MALI_RGBA16F, + .swizzle = default_vec4_swizzle, + .unknown1 = 0x2, + + /* Set offset to keep everything back-to-back in + * the same buffer */ + .src_offset = 8 * i, + }; + + mali_varyings[i] = vec4_varying_meta; + } + + /* We don't count the weirdo gl_Position in our varying count */ + varyings->varying_count = varying_count - 1; + + /* In this context, position_meta represents the implicit + * gl_FragCoord varying. So, upload all the varyings */ + + unsigned varyings_size = sizeof(struct mali_attr_meta) * varyings->varying_count; + unsigned vertex_special_size = sizeof(struct mali_attr_meta) * vertex_special_count; + unsigned vertex_size = vertex_special_size + varyings_size; + unsigned fragment_size = varyings_size + sizeof(struct mali_attr_meta); + + struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, vertex_size + fragment_size, HEAP_DESCRIPTOR); + + /* Copy varyings in the follow order: + * - Position 1, 2 + * - Varyings 1, 2, ..., n + * - Varyings 1, 2, ..., n (duplicate) + * - Position 1 + */ + + memcpy(transfer.cpu, vertex_special_varyings, vertex_special_size); + memcpy(transfer.cpu + vertex_special_size, mali_varyings, varyings_size); + memcpy(transfer.cpu + vertex_size, mali_varyings, varyings_size); + memcpy(transfer.cpu + vertex_size + varyings_size, &vertex_special_varyings[0], sizeof(struct mali_attr_meta)); + + /* Point to the descriptor */ + varyings->varyings_buffer_cpu = transfer.cpu; + varyings->varyings_descriptor = transfer.gpu; + varyings->varyings_descriptor_fragment = transfer.gpu + vertex_size; + } +} diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c new file mode 100644 index 00000000000..566c6652fda --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -0,0 +1,178 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <stdio.h> +#include "pan_blend_shaders.h" +#include "midgard/midgard_compile.h" +#include "compiler/nir/nir_builder.h" +//#include "gallium/auxiliary/nir/nir_lower_blend.h" + +/* + * Implements the command stream portion of programmatic blend shaders. + * + * On Midgard, common blending operations are accelerated by the fixed-function + * blending pipeline. Panfrost supports this fast path via the code in + * pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly + * simple modes present in ES2) require "blend shaders", a special internal + * shader type used for programmable blending. + * + * Blend shaders operate during the normal blending time, but they bypass the + * fixed-function blending pipeline and instead go straight to the Midgard + * shader cores. The shaders themselves are essentially just fragment shaders, + * making heavy use of uint8 arithmetic to manipulate RGB values for the + * framebuffer. + * + * As is typical with Midgard, shader binaries must be accompanied by + * information about the first tag (ORed with the bottom nibble of address, + * like usual) and work registers. Work register count is specified in the + * blend descriptor, as well as in the coresponding fragment shader's work + * count. This suggests that blend shader invocation is tied to fragment shader + * execution. + * + * --- + * + * As for blend shaders, they use the standard ISA. + * + * The source pixel colour, including alpha, is preloaded into r0 as a vec4 of + * float32. + * + * The destination pixel colour must be loaded explicitly via load/store ops. + * TODO: Investigate. + * + * They use fragment shader writeout; however, instead of writing a vec4 of + * float32 for RGBA encoding, we writeout a vec4 of uint8, using 8-bit imov + * instead of 32-bit fmov. The net result is that r0 encodes a single uint32 + * containing all four channels of the color. Accordingly, the blend shader + * epilogue has to scale all four channels by 255 and then type convert to a + * uint8. + * + * --- + * + * Blend shaders hardcode constants. Naively, this requires recompilation each + * time the blend color changes, which is a performance risk. Accordingly, we + * 'cheat' a bit: instead of loading the constant, we compile a shader with a + * dummy constant, exporting the offset to the immediate in the shader binary, + * storing this generic binary and metadata in the CSO itself at CSO create + * time. + * + * We then hot patch in the color into this shader at attachment / color change + * time, allowing for CSO create to be the only expensive operation + * (compilation). + */ + +static nir_ssa_def * +nir_blending_f(const struct pipe_rt_blend_state *blend, nir_builder *b, + nir_ssa_def *s_src, nir_ssa_def *s_dst, nir_ssa_def *s_con) +{ + /* Stub, to be replaced by the real implementation when that is + * upstream (pending on a rewrite to be Gallium agnostic) */ + + return s_src; +} + +void +panfrost_make_blend_shader(struct panfrost_context *ctx, struct panfrost_blend_state *cso, const struct pipe_blend_color *blend_color) +{ + const struct pipe_rt_blend_state *blend = &cso->base.rt[0]; + mali_ptr *out = &cso->blend_shader; + + /* Build the shader */ + + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL); + nir_function *fn = nir_function_create(shader, "main"); + nir_function_impl *impl = nir_function_impl_create(fn); + + /* Create the blend variables */ + + nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color"); + nir_variable *c_dst = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_SecondaryColor"); + nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_FragColor"); + nir_variable *c_con = nir_variable_create(shader, nir_var_uniform, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "constant"); + + c_src->data.location = VARYING_SLOT_COL0; + c_dst->data.location = VARYING_SLOT_COL1; + c_out->data.location = FRAG_RESULT_COLOR; + + /* Setup nir_builder */ + + nir_builder _b; + nir_builder *b = &_b; + nir_builder_init(b, impl); + b->cursor = nir_before_block(nir_start_block(impl)); + + /* Setup inputs */ + + nir_ssa_def *s_src = nir_load_var(b, c_src); + nir_ssa_def *s_dst = nir_load_var(b, c_dst); + nir_ssa_def *s_con = nir_load_var(b, c_con); + + /* Build a trivial blend shader */ + nir_store_var(b, c_out, nir_blending_f(blend, b, s_src, s_dst, s_con), 0xFF); + + nir_print_shader(shader, stdout); + + /* Compile the built shader */ + + midgard_program program; + midgard_compile_shader_nir(shader, &program, true); + + + /* Upload the shader */ + + int size = program.compiled.size; + uint8_t *dst = program.compiled.data; + +#if 0 + midgard_program program = { + .work_register_count = 3, + .first_tag = 9, + //.blend_patch_offset = 16 + .blend_patch_offset = -1, + }; + + char dst[4096]; + + FILE *fp = fopen("/home/alyssa/panfrost/midgard/blend.bin", "rb"); + fread(dst, 1, 2816, fp); + fclose(fp); + int size = 2816; +#endif + + /* Hot patch in constant color */ + + if (program.blend_patch_offset >= 0) { + float *hot_color = (float *) (dst + program.blend_patch_offset); + + for (int c = 0; c < 4; ++c) + hot_color[c] = blend_color->color[c]; + } + + *out = panfrost_upload(&ctx->shaders, dst, size, true) | program.first_tag; + + /* We need to switch to shader mode */ + cso->has_blend_shader = true; + + /* At least two work registers are needed due to an encoding quirk */ + cso->blend_work_count = MAX2(program.work_register_count, 2); +} diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.h b/src/gallium/drivers/panfrost/pan_blend_shaders.h new file mode 100644 index 00000000000..1a914772673 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.h @@ -0,0 +1,36 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_BLEND_SHADERS_H__ +#define __PAN_BLEND_SHADERS_H__ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include <panfrost-job.h> +#include "pan_context.h" + +void +panfrost_make_blend_shader(struct panfrost_context *ctx, struct panfrost_blend_state *cso, const struct pipe_blend_color *blend_color); + +#endif diff --git a/src/gallium/drivers/panfrost/pan_blending.c b/src/gallium/drivers/panfrost/pan_blending.c new file mode 100644 index 00000000000..058fb6bda84 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blending.c @@ -0,0 +1,401 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <stdio.h> +#include "pan_blending.h" + +/* + * Implements fixed-function blending on Midgard. + * + * Midgard splits blending into a fixed-function fast path and a programmable + * slow path. The fixed function blending architecture is based on "dominant" + * blend factors. Blending is encoded separately (but identically) between RGB + * and alpha functions. + * + * Essentially, for a given blending operation, there is a single dominant + * factor. The following dominant factors are possible: + * + * - zero + * - source color + * - destination color + * - source alpha + * - destination alpha + * - constant float + * + * Further, a dominant factor's arithmetic compliment could be used. For + * instance, to encode GL_ONE_MINUS_SOURCE_ALPHA, the dominant factor would be + * MALI_DOMINANT_SRC_ALPHA with the complement_dominant bit set. + * + * A single constant float can be passed to the fixed-function hardware, + * allowing CONSTANT_ALPHA support. Further, if all components of the constant + * glBlendColor are identical, CONSTANT_COLOR can be implemented with the + * constant float mode. If the components differ, programmable blending is + * required. + * + * The nondominant factor can be either: + * + * - the same as the dominant factor (MALI_BLEND_NON_MIRROR) + * - zero (MALI_BLEND_NON_ZERO) + * + * Exactly one of the blend operation's source or destination can be used as + * the dominant factor; this is selected by the + * MALI_BLEND_DOM_SOURCE/DESTINATION flag. + * + * By default, all blending follows the standard OpenGL addition equation: + * + * out = source_value * source_factor + destination_value * destination_factor + * + * By setting the negate_source or negate_dest bits, other blend functions can + * be created. For instance, for SUBTRACT mode, set the "negate destination" + * flag, and similarly for REVERSE_SUBTRACT with "negate source". + * + * Finally, there is a "clip modifier" controlling the final blending + * behaviour, allowing for the following modes: + * + * - normal + * - force source factor to one (MALI_BLEND_MODE_SOURCE_ONE) + * - force destination factor to one (MALI_BLEND_MODE_DEST_ONE) + * + * The clipping flags can be used to encode blend modes where the nondominant + * factor is ONE. + * + * As an example putting it all together, to encode the following blend state: + * + * glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + * glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE); + * + * We need the following configuration: + * + * - negate source (for REVERSE_SUBTRACT) + * - dominant factor "source alpha" + * - compliment dominant + * - source dominant + * - force destination to ONE + * + * The following routines implement this fixed function blending encoding + */ + +/* Helper to find the uncomplemented Gallium blend factor corresponding to a + * complemented Gallium blend factor */ + +static int +complement_factor(int factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return PIPE_BLENDFACTOR_SRC_COLOR; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return PIPE_BLENDFACTOR_SRC_ALPHA; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_ALPHA; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return PIPE_BLENDFACTOR_DST_COLOR; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return PIPE_BLENDFACTOR_CONST_COLOR; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return PIPE_BLENDFACTOR_CONST_ALPHA; + + default: + return -1; + } +} + +/* Helper to strip the complement from any Gallium blend factor */ + +static int +uncomplement_factor(int factor) +{ + int complement = complement_factor(factor); + return (complement == -1) ? factor : complement; +} + + +/* Attempt to find the dominant factor given a particular factor, complementing + * as necessary */ + +static bool +panfrost_make_dominant_factor(unsigned src_factor, enum mali_dominant_factor *factor, bool *invert) +{ + switch (src_factor) { + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + *factor = MALI_DOMINANT_SRC_COLOR; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + *factor = MALI_DOMINANT_SRC_ALPHA; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + *factor = MALI_DOMINANT_DST_COLOR; + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + *factor = MALI_DOMINANT_DST_ALPHA; + break; + + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_ZERO: + *factor = MALI_DOMINANT_ZERO; + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + *factor = MALI_DOMINANT_CONSTANT; + break; + + default: + /* Fancy blend modes not supported */ + return false; + } + + /* Set invert flags */ + + switch (src_factor) { + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + *invert = true; + + default: + break; + } + + return true; +} + +/* Check if this is a special edge case blend factor, which may require the use + * of clip modifiers */ + +static bool +is_edge_blendfactor(unsigned factor) +{ + return factor == PIPE_BLENDFACTOR_ONE || factor == PIPE_BLENDFACTOR_ZERO; +} + +/* Perform the actual fixed function encoding. Encode the function with negate + * bits. Check for various cases to work out the dominant/nondominant split and + * accompanying flags. */ + +static bool +panfrost_make_fixed_blend_part(unsigned func, unsigned src_factor, unsigned dst_factor, unsigned *out) +{ + struct mali_blend_mode part = { 0 }; + + /* Make sure that the blend function is representible with negate flags */ + + if (func == PIPE_BLEND_ADD) { + /* Default, no modifiers needed */ + } else if (func == PIPE_BLEND_SUBTRACT) + part.negate_dest = true; + else if (func == PIPE_BLEND_REVERSE_SUBTRACT) + part.negate_source = true; + else + return false; + + part.clip_modifier = MALI_BLEND_MOD_NORMAL; + + /* Decide which is dominant, source or destination. If one is an edge + * case, use the other as a factor. If they're the same, it doesn't + * matter; we just mirror. If they're different non-edge-cases, you + * need a blend shader (don't do that). */ + + if (is_edge_blendfactor(dst_factor)) { + part.dominant = MALI_BLEND_DOM_SOURCE; + part.nondominant_mode = MALI_BLEND_NON_ZERO; + + if (dst_factor == PIPE_BLENDFACTOR_ONE) + part.clip_modifier = MALI_BLEND_MOD_DEST_ONE; + } else if (is_edge_blendfactor(src_factor)) { + part.dominant = MALI_BLEND_DOM_DESTINATION; + part.nondominant_mode = MALI_BLEND_NON_ZERO; + + if (src_factor == PIPE_BLENDFACTOR_ONE) + part.clip_modifier = MALI_BLEND_MOD_SOURCE_ONE; + + } else if (src_factor == dst_factor) { + part.dominant = MALI_BLEND_DOM_DESTINATION; /* Ought to be an arbitrary choice, but we need to set destination for some reason? Align with the blob until we understand more */ + part.nondominant_mode = MALI_BLEND_NON_MIRROR; + } else if (src_factor == complement_factor(dst_factor)) { + /* TODO: How does this work exactly? */ + part.dominant = MALI_BLEND_DOM_SOURCE; + part.nondominant_mode = MALI_BLEND_NON_MIRROR; + part.clip_modifier = MALI_BLEND_MOD_DEST_ONE; + } else if (dst_factor == complement_factor(src_factor)) { + part.dominant = MALI_BLEND_DOM_SOURCE; + part.nondominant_mode = MALI_BLEND_NON_MIRROR; + part.clip_modifier = /*MALI_BLEND_MOD_SOURCE_ONE*/MALI_BLEND_MOD_DEST_ONE; /* Which modifier should it be? */ + } else { + printf("Failed to find dominant factor?\n"); + return false; + } + + unsigned in_dominant_factor = + part.dominant == MALI_BLEND_DOM_SOURCE ? src_factor : dst_factor; + + if (part.clip_modifier == MALI_BLEND_MOD_NORMAL && in_dominant_factor == PIPE_BLENDFACTOR_ONE) { + part.clip_modifier = part.dominant == MALI_BLEND_DOM_SOURCE ? MALI_BLEND_MOD_SOURCE_ONE : MALI_BLEND_MOD_DEST_ONE; + in_dominant_factor = PIPE_BLENDFACTOR_ZERO; + } + + bool invert_dominant = false; + enum mali_dominant_factor dominant_factor; + + if (!panfrost_make_dominant_factor(in_dominant_factor, &dominant_factor, &invert_dominant)) + return false; + + part.dominant_factor = dominant_factor; + part.complement_dominant = invert_dominant; + + /* Write out mode */ + memcpy(out, &part, sizeof(part)); + + return true; +} + +/* We can upload a single constant for all of the factors. So, scan the factors + * for constants used, and scan the constants for the constants used. If there + * is a single unique constant, output that. If there are multiple, + * fixed-function operation breaks down. */ + +static bool +panfrost_make_constant(unsigned *factors, unsigned num_factors, const struct pipe_blend_color *blend_color, float *out) +{ + /* Color components used */ + bool cc[4] = { false }; + + for (unsigned i = 0; i < num_factors; ++i) { + unsigned factor = uncomplement_factor(factors[i]); + + if (factor == PIPE_BLENDFACTOR_CONST_COLOR) + cc[0] = cc[1] = cc[2] = true; + else if (factor == PIPE_BLENDFACTOR_CONST_ALPHA) + cc[3] = true; + } + + /* Find the actual constant associated with the components used*/ + + float constant = 0.0; + bool has_constant = false; + + for (unsigned i = 0; i < 4; ++i) { + /* If the component is unused, nothing to do */ + if (!cc[i]) continue; + + float value = blend_color->color[i]; + + /* Either there's a second constant, in which case we fail, or + * there's no constant / a first constant, in which case we use + * that constant */ + + if (has_constant && constant != value) { + return false; + } else { + has_constant = true; + constant = value; + } + } + + /* We have the constant -- success! */ + + *out = constant; + return true; +} + +/* Create the descriptor for a fixed blend mode given the corresponding Gallium + * state, if possible. Return true and write out the blend descriptor into + * blend_equation. If it is not possible with the fixed function + * representating, return false to handle degenerate cases with a blend shader + */ + +static const struct pipe_rt_blend_state default_blend = { + .blend_enable = 1, + + .rgb_func = PIPE_BLEND_ADD, + .rgb_src_factor = PIPE_BLENDFACTOR_ONE, + .rgb_dst_factor = PIPE_BLENDFACTOR_ZERO, + + .alpha_func = PIPE_BLEND_ADD, + .alpha_src_factor = PIPE_BLENDFACTOR_ONE, + .alpha_dst_factor = PIPE_BLENDFACTOR_ZERO, + + .colormask = PIPE_MASK_RGBA +}; + +bool +panfrost_make_fixed_blend_mode(const struct pipe_rt_blend_state *blend, struct mali_blend_equation *out, unsigned colormask, const struct pipe_blend_color *blend_color) +{ + /* If no blending is enabled, default back on `replace` mode */ + + if (!blend->blend_enable) + return panfrost_make_fixed_blend_mode(&default_blend, out, colormask, blend_color); + + /* We have room only for a single float32 constant between the four + * components. If we need more, spill to the programmable pipeline. */ + + unsigned factors[] = { + blend->rgb_src_factor, blend->rgb_dst_factor, + blend->alpha_src_factor, blend->alpha_dst_factor, + }; + + if (!panfrost_make_constant(factors, ARRAY_SIZE(factors), blend_color, &out->constant)) + return false; + + unsigned rgb_mode = 0; + unsigned alpha_mode = 0; + + if (!panfrost_make_fixed_blend_part( + blend->rgb_func, blend->rgb_src_factor, blend->rgb_dst_factor, + &rgb_mode)) + return false; + + if (!panfrost_make_fixed_blend_part( + blend->alpha_func, blend->alpha_src_factor, blend->alpha_dst_factor, + &alpha_mode)) + return false; + + out->rgb_mode = rgb_mode; + out->alpha_mode = alpha_mode; + + /* Gallium and Mali represent colour masks identically. XXX: Static assert for future proof */ + out->color_mask = colormask; + + return true; +} diff --git a/src/gallium/drivers/panfrost/pan_blending.h b/src/gallium/drivers/panfrost/pan_blending.h new file mode 100644 index 00000000000..926b41e298e --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_blending.h @@ -0,0 +1,34 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_BLENDING_H__ +#define __PAN_BLENDING_H__ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include <panfrost-job.h> + +bool panfrost_make_fixed_blend_mode(const struct pipe_rt_blend_state *blend, struct mali_blend_equation *out, unsigned colormask, const struct pipe_blend_color *blend_color); + +#endif diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c new file mode 100644 index 00000000000..0551d553182 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -0,0 +1,2698 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <sys/poll.h> +#include <errno.h> + +#include "pan_context.h" +#include "pan_swizzle.h" +#include "pan_format.h" + +#include "util/macros.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_upload_mgr.h" +#include "util/u_memory.h" +#include "util/half_float.h" +#include "indices/u_primconvert.h" +#include "tgsi/tgsi_parse.h" + +#include "pan_screen.h" +#include "pan_blending.h" +#include "pan_blend_shaders.h" +#include "pan_wallpaper.h" + +#ifdef DUMP_PERFORMANCE_COUNTERS +static int performance_counter_number = 0; +#endif + +/* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ +//#define DRY_RUN + +#define SET_BIT(lval, bit, cond) \ + if (cond) \ + lval |= (bit); \ + else \ + lval &= ~(bit); + +/* TODO: Sample size, etc */ + +static void +panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) +{ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); + +#ifdef SFBD + SET_BIT(ctx->fragment_fbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); +#else + SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled); + + SET_BIT(ctx->fragment_fbd.unk1, (1 << 4) | (1 << 1), enabled); + + /* XXX */ + ctx->fragment_fbd.rt_count_2 = enabled ? 4 : 1; +#endif +} + +/* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically + * indepdent between color buffers and depth/stencil). To enable, we allocate + * the AFBC metadata buffer and mark that it is enabled. We do -not- actually + * edit the fragment job here. This routine should be called ONCE per + * AFBC-compressed buffer, rather than on every frame. */ + +static void +panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) +{ +#ifdef MFBD + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + /* AFBC metadata is 16 bytes per tile */ + int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; + int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; + int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); + int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ + + stride *= 2; /* TODO: Should this be carried over? */ + int main_size = stride * rsrc->base.height0; + rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16; + + /* Allocate the AFBC slab itself, large enough to hold the above */ + screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, + (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, + true, 0, 0, 0); + + rsrc->bo->has_afbc = true; + + /* Compressed textured reads use a tagged pointer to the metadata */ + + rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); + rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu; +#else + printf("AFBC not supported yet on SFBD\n"); + assert(0); +#endif +} + +static void +panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; + int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; + + /* 8 byte checksum per tile */ + rsrc->bo->checksum_stride = tile_w * 8; + int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); + screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); + + rsrc->bo->has_checksum = true; +} + +/* ..by contrast, this routine runs for every FRAGMENT job, but does no + * allocation. AFBC is enabled on a per-surface basis */ + +static void +panfrost_set_fragment_afbc(struct panfrost_context *ctx) +{ + for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) { + struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[cb]->texture; + + /* Non-AFBC is the default */ + if (!rsrc->bo->has_afbc) + continue; + + /* Enable AFBC for the render target */ + ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu; + ctx->fragment_rts[0].afbc.stride = 0; + ctx->fragment_rts[0].afbc.unk = 0x30009; + + ctx->fragment_rts[0].format |= MALI_MFBD_FORMAT_AFBC; + + /* Change colourspace from RGB to BGR? */ +#if 0 + ctx->fragment_rts[0].format |= 0x800000; + ctx->fragment_rts[0].format &= ~0x20000; +#endif + + /* Point rendering to our special framebuffer */ + ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; + + /* WAT? Stride is diff from the scanout case */ + ctx->fragment_rts[0].framebuffer_stride = ctx->pipe_framebuffer.width * 2 * 4; + } + + /* Enable depth/stencil AFBC for the framebuffer (not the render target) */ + if (ctx->pipe_framebuffer.zsbuf) { + struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; + + if (rsrc->bo->has_afbc) { + ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + + ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; + ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0; + + ctx->fragment_extra.ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; + + ctx->fragment_extra.ds_afbc.zero1 = 0x10009; + ctx->fragment_extra.ds_afbc.padding = 0x1000; + + ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */ + + ctx->fragment_fbd.unk3 |= 0x400; + } + } + + /* For the special case of a depth-only FBO, we need to attach a dummy render target */ + + if (ctx->pipe_framebuffer.nr_cbufs == 0) { + ctx->fragment_rts[0].format = 0x80008000; + ctx->fragment_rts[0].framebuffer = 0; + ctx->fragment_rts[0].framebuffer_stride = 0; + } +} + +/* Framebuffer descriptor */ + +#ifdef SFBD +static void +panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) +{ + fb->width = MALI_POSITIVE(w); + fb->height = MALI_POSITIVE(h); + + /* No idea why this is needed, but it's how resolution_check is + * calculated. It's not clear to us yet why the hardware wants this. + * The formula itself was discovered mostly by manual bruteforce and + * aggressive algebraic simplification. */ + + fb->resolution_check = ((w + h) / 3) << 4; +} +#endif + +static PANFROST_FRAMEBUFFER +panfrost_emit_fbd(struct panfrost_context *ctx) +{ +#ifdef SFBD + struct mali_single_framebuffer framebuffer = { + .unknown2 = 0x1f, + .format = 0x30000000, + .clear_flags = 0x1000, + .unknown_address_0 = ctx->scratchpad.gpu, + .unknown_address_1 = ctx->scratchpad.gpu + 0x6000, + .unknown_address_2 = ctx->scratchpad.gpu + 0x6200, + .tiler_flags = 0xf0, + .tiler_heap_free = ctx->tiler_heap.gpu, + .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, + }; + + panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); +#else + struct bifrost_framebuffer framebuffer = { + .tiler_meta = 0xf00000c600, + + .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), + .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), + .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width), + .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height), + + .unk1 = 0x1080, + + /* TODO: MRT */ + .rt_count_1 = MALI_POSITIVE(1), + .rt_count_2 = 4, + + .unknown2 = 0x1f, + + /* Presumably corresponds to unknown_address_X of SFBD */ + .scratchpad = ctx->scratchpad.gpu, + .tiler_scratch_start = ctx->misc_0.gpu, + .tiler_scratch_middle = ctx->misc_0.gpu + /*ctx->misc_0.size*/40960, /* Size depends on the size of the framebuffer and the number of vertices */ + + .tiler_heap_start = ctx->tiler_heap.gpu, + .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, + }; + +#endif + + return framebuffer; +} + +/* Are we currently rendering to the screen (rather than an FBO)? */ + +static bool +panfrost_is_scanout(struct panfrost_context *ctx) +{ + /* If there is no color buffer, it's an FBO */ + if (!ctx->pipe_framebuffer.nr_cbufs) + return false; + + /* If we're too early that no framebuffer was sent, it's scanout */ + if (!ctx->pipe_framebuffer.cbufs[0]) + return true; + + return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET || + ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT || + ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; +} + +/* The above function is for generalised fbd emission, used in both fragment as + * well as vertex/tiler payloads. This payload is specific to fragment + * payloads. */ + +static void +panfrost_new_frag_framebuffer(struct panfrost_context *ctx) +{ + mali_ptr framebuffer; + int stride; + + if (ctx->pipe_framebuffer.nr_cbufs > 0) { + framebuffer = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->bo->gpu[0]; + stride = util_format_get_stride(ctx->pipe_framebuffer.cbufs[0]->format, ctx->pipe_framebuffer.width); + } else { + /* Depth-only framebuffer -> dummy RT */ + framebuffer = 0; + stride = 0; + } + + /* The default is upside down from OpenGL's perspective. */ + if (panfrost_is_scanout(ctx)) { + framebuffer += stride * (ctx->pipe_framebuffer.height - 1); + stride = -stride; + } + +#ifdef SFBD + struct mali_single_framebuffer fb = panfrost_emit_fbd(ctx); + + fb.framebuffer = framebuffer; + fb.stride = stride; + + fb.format = 0xb84e0281; /* RGB32, no MSAA */ +#else + struct bifrost_framebuffer fb = panfrost_emit_fbd(ctx); + + /* XXX: MRT case */ + fb.rt_count_2 = 1; + fb.unk3 = 0x100; + + struct bifrost_render_target rt = { + .unk1 = 0x4000000, + .format = 0x860a8899, /* RGBA32, no MSAA */ + .framebuffer = framebuffer, + .framebuffer_stride = (stride / 16) & 0xfffffff, + }; + + memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); + + memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); +#endif + + memcpy(&ctx->fragment_fbd, &fb, sizeof(fb)); +} + +/* Maps float 0.0-1.0 to int 0x00-0xFF */ +static uint8_t +normalised_float_to_u8(float f) +{ + return (uint8_t) (int) (f * 255.0f); +} + +static void +panfrost_clear( + struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct panfrost_context *ctx = pan_context(pipe); + + if (!color) { + printf("Warning: clear color null?\n"); + return; + } + + /* Save settings for FBO switch */ + ctx->last_clear.buffers = buffers; + ctx->last_clear.color = color; + ctx->last_clear.depth = depth; + ctx->last_clear.depth = depth; + + bool clear_color = buffers & PIPE_CLEAR_COLOR; + bool clear_depth = buffers & PIPE_CLEAR_DEPTH; + bool clear_stencil = buffers & PIPE_CLEAR_STENCIL; + + /* Remember that we've done something */ + ctx->frame_cleared = true; + + /* Alpha clear only meaningful without alpha channel */ + bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); + float clear_alpha = has_alpha ? color->f[3] : 1.0f; + + uint32_t packed_color = + (normalised_float_to_u8(clear_alpha) << 24) | + (normalised_float_to_u8(color->f[2]) << 16) | + (normalised_float_to_u8(color->f[1]) << 8) | + (normalised_float_to_u8(color->f[0]) << 0); + +#ifdef MFBD + struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; +#else + struct mali_single_framebuffer *buffer_color = &ctx->fragment_fbd; +#endif + +#ifdef MFBD + struct bifrost_framebuffer *buffer_ds = &ctx->fragment_fbd; +#else + struct mali_single_framebuffer *buffer_ds = buffer_color; +#endif + + if (clear_color) { + /* Fields duplicated 4x for unknown reasons. Same in Utgard, + * too, which is doubly weird. */ + + buffer_color->clear_color_1 = packed_color; + buffer_color->clear_color_2 = packed_color; + buffer_color->clear_color_3 = packed_color; + buffer_color->clear_color_4 = packed_color; + } + + if (clear_depth) { +#ifdef SFBD + buffer_ds->clear_depth_1 = depth; + buffer_ds->clear_depth_2 = depth; + buffer_ds->clear_depth_3 = depth; + buffer_ds->clear_depth_4 = depth; +#else + buffer_ds->clear_depth = depth; +#endif + } + + if (clear_stencil) { + buffer_ds->clear_stencil = stencil; + } + + /* Setup buffers depending on MFBD/SFBD */ + +#ifdef MFBD + + if (clear_depth || clear_stencil) { + /* Setup combined 24/8 depth/stencil */ + ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + //ctx->fragment_extra.unk = /*0x405*/0x404; + ctx->fragment_extra.unk = 0x405; + ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu; + ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4; + } + +#else + + if (clear_depth) { + buffer_ds->depth_buffer = ctx->depth_stencil_buffer.gpu; + buffer_ds->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; + } + + if (clear_stencil) { + buffer_ds->stencil_buffer = ctx->depth_stencil_buffer.gpu; + buffer_ds->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; + } + +#endif + +#ifdef SFBD + /* Set flags based on what has been cleared, for the SFBD case */ + /* XXX: What do these flags mean? */ + int clear_flags = 0x101100; + + if (clear_color && clear_depth && clear_stencil) { + /* On a tiler like this, it's fastest to clear all three buffers at once */ + + clear_flags |= MALI_CLEAR_FAST; + } else { + clear_flags |= MALI_CLEAR_SLOW; + + if (clear_stencil) + clear_flags |= MALI_CLEAR_SLOW_STENCIL; + } + + fbd->clear_flags = clear_flags; +#endif +} + +static void +panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) +{ +#ifdef MFBD + /* MFBD needs a sequential semi-render target upload, but this is, is beyond me for now */ + struct bifrost_render_target rts_list[] = { + { + .chunknown = { + .unk = 0x30005, + }, + .framebuffer = ctx->misc_0.gpu, + .zero2 = 0x3, + }, + }; + + /* Allocate memory for the three components */ + int size = 1024 + sizeof(ctx->vt_framebuffer) + sizeof(rts_list); + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); + + /* Opaque 1024-block */ + rts_list[0].chunknown.pointer = transfer.gpu; + + mali_ptr framebuffer = (transfer.gpu + 1024) | PANFROST_DEFAULT_FBD; + memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer)); + memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer), rts_list, sizeof(rts_list)); +#else + mali_ptr framebuffer = panfrost_upload_transient(ctx, &ctx->vt_framebuffer, sizeof(ctx->vt_framebuffer)) | PANFROST_DEFAULT_FBD; +#endif + ctx->payload_vertex.postfix.framebuffer = framebuffer; + ctx->payload_tiler.postfix.framebuffer = framebuffer; +} + +static void +panfrost_viewport(struct panfrost_context *ctx, + float depth_range_n, + float depth_range_f, + int viewport_x0, int viewport_y0, + int viewport_x1, int viewport_y1) +{ + /* Viewport encoding is asymmetric. Purpose of the floats is unknown? */ + + struct mali_viewport ret = { + .floats = { +#if 0 + -inff, -inff, + inff, inff, +#endif + 0.0, 0.0, + 2048.0, 1600.0, + }, + + .depth_range_n = depth_range_n, + .depth_range_f = depth_range_f, + + .viewport0 = { viewport_x0, viewport_y0 }, + .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) }, + }; + + memcpy(ctx->viewport, &ret, sizeof(ret)); +} + +/* Reset per-frame context, called on context initialisation as well as after + * flushing a frame */ + +static void +panfrost_invalidate_frame(struct panfrost_context *ctx) +{ + unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; + printf("Uploaded transient %d bytes\n", transient_count); + + /* Rotate cmdstream */ + if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) + ctx->cmdstream_i = 0; + + ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + panfrost_new_frag_framebuffer(ctx); + + /* Reset varyings allocated */ + ctx->varying_height = 0; + + /* The transient cmdstream is dirty every frame; the only bits worth preserving + * (textures, shaders, etc) are in other buffers anyways */ + + ctx->transient_pools[ctx->cmdstream_i].entry_index = 0; + ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0; + + /* Regenerate payloads */ + panfrost_attach_vt_framebuffer(ctx); + + if (ctx->rasterizer) + ctx->dirty |= PAN_DIRTY_RASTERIZER; + + /* XXX */ + ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; +} + +/* In practice, every field of these payloads should be configurable + * arbitrarily, which means these functions are basically catch-all's for + * as-of-yet unwavering unknowns */ + +static void +panfrost_emit_vertex_payload(struct panfrost_context *ctx) +{ + struct midgard_payload_vertex_tiler payload = { + .prefix = { + .workgroups_z_shift = 32, + .workgroups_x_shift_2 = 0x2, + .workgroups_x_shift_3 = 0x5, + }, + .gl_enables = 0x6 + }; + + memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); +} + +static void +panfrost_emit_tiler_payload(struct panfrost_context *ctx) +{ + struct midgard_payload_vertex_tiler payload = { + .prefix = { + .workgroups_z_shift = 32, + .workgroups_x_shift_2 = 0x2, + .workgroups_x_shift_3 = 0x6, + + .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ + }, + }; + + /* Reserve the viewport */ + struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR); + ctx->viewport = (struct mali_viewport *) t.cpu; + payload.postfix.viewport = t.gpu; + + memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); +} + +static unsigned +translate_tex_wrap(enum pipe_tex_wrap w) +{ + switch (w) { + case PIPE_TEX_WRAP_REPEAT: + return MALI_WRAP_REPEAT; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return MALI_WRAP_CLAMP_TO_EDGE; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return MALI_WRAP_CLAMP_TO_BORDER; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return MALI_WRAP_MIRRORED_REPEAT; + + default: + assert(0); + return 0; + } +} + +static unsigned +translate_tex_filter(enum pipe_tex_filter f) +{ + switch (f) { + case PIPE_TEX_FILTER_NEAREST: + return MALI_NEAREST; + + case PIPE_TEX_FILTER_LINEAR: + return MALI_LINEAR; + + default: + assert(0); + return 0; + } +} + +static unsigned +translate_mip_filter(enum pipe_tex_mipfilter f) +{ + return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0; +} + +static unsigned +panfrost_translate_compare_func(enum pipe_compare_func in) +{ + switch (in) { + case PIPE_FUNC_NEVER: + return MALI_FUNC_NEVER; + + case PIPE_FUNC_LESS: + return MALI_FUNC_LESS; + + case PIPE_FUNC_EQUAL: + return MALI_FUNC_EQUAL; + + case PIPE_FUNC_LEQUAL: + return MALI_FUNC_LEQUAL; + + case PIPE_FUNC_GREATER: + return MALI_FUNC_GREATER; + + case PIPE_FUNC_NOTEQUAL: + return MALI_FUNC_NOTEQUAL; + + case PIPE_FUNC_GEQUAL: + return MALI_FUNC_GEQUAL; + + case PIPE_FUNC_ALWAYS: + return MALI_FUNC_ALWAYS; + } + + assert (0); + return 0; /* Unreachable */ +} + +static unsigned +panfrost_translate_alt_compare_func(enum pipe_compare_func in) +{ + switch (in) { + case PIPE_FUNC_NEVER: + return MALI_ALT_FUNC_NEVER; + + case PIPE_FUNC_LESS: + return MALI_ALT_FUNC_LESS; + + case PIPE_FUNC_EQUAL: + return MALI_ALT_FUNC_EQUAL; + + case PIPE_FUNC_LEQUAL: + return MALI_ALT_FUNC_LEQUAL; + + case PIPE_FUNC_GREATER: + return MALI_ALT_FUNC_GREATER; + + case PIPE_FUNC_NOTEQUAL: + return MALI_ALT_FUNC_NOTEQUAL; + + case PIPE_FUNC_GEQUAL: + return MALI_ALT_FUNC_GEQUAL; + + case PIPE_FUNC_ALWAYS: + return MALI_ALT_FUNC_ALWAYS; + } + + assert (0); + return 0; /* Unreachable */ +} + +static unsigned +panfrost_translate_stencil_op(enum pipe_stencil_op in) +{ + switch (in) { + case PIPE_STENCIL_OP_KEEP: + return MALI_STENCIL_KEEP; + + case PIPE_STENCIL_OP_ZERO: + return MALI_STENCIL_ZERO; + + case PIPE_STENCIL_OP_REPLACE: + return MALI_STENCIL_REPLACE; + + case PIPE_STENCIL_OP_INCR: + return MALI_STENCIL_INCR; + + case PIPE_STENCIL_OP_DECR: + return MALI_STENCIL_DECR; + + case PIPE_STENCIL_OP_INCR_WRAP: + return MALI_STENCIL_INCR_WRAP; + + case PIPE_STENCIL_OP_DECR_WRAP: + return MALI_STENCIL_DECR_WRAP; + + case PIPE_STENCIL_OP_INVERT: + return MALI_STENCIL_INVERT; + } + + assert (0); + return 0; /* Unreachable */ +} + +static void +panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) +{ + out->ref = 0; /* Gallium gets it from elsewhere */ + + out->mask = in->valuemask; + out->func = panfrost_translate_compare_func(in->func); + out->sfail = panfrost_translate_stencil_op(in->fail_op); + out->dpfail = panfrost_translate_stencil_op(in->zfail_op); + out->dppass = panfrost_translate_stencil_op(in->zpass_op); +} + +static void +panfrost_default_shader_backend(struct panfrost_context *ctx) +{ + struct mali_shader_meta shader = { + .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), + + .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010 /*| MALI_CAN_DISCARD*/, +#ifdef T8XX + .unknown2_4 = MALI_NO_MSAA | 0x4e0, +#else + .unknown2_4 = MALI_NO_MSAA | 0x4f0, +#endif + }; + + struct pipe_stencil_state default_stencil = { + .enabled = 0, + .func = PIPE_FUNC_ALWAYS, + .fail_op = MALI_STENCIL_KEEP, + .zfail_op = MALI_STENCIL_KEEP, + .zpass_op = MALI_STENCIL_KEEP, + .writemask = 0xFF, + .valuemask = 0xFF + }; + + panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); + shader.stencil_mask_front = default_stencil.writemask; + + panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); + shader.stencil_mask_back = default_stencil.writemask; + + if (default_stencil.enabled) + shader.unknown2_4 |= MALI_STENCIL_TEST; + + memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); +} + +/* Generates a vertex/tiler job. This is, in some sense, the heart of the + * graphics command stream. It should be called once per draw, accordding to + * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in + * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for + * vertex jobs. */ + +struct panfrost_transfer +panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler) +{ + /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */ + int draw_job_index = 1 + (2 * ctx->draw_count); + + struct mali_job_descriptor_header job = { + .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, + .job_index = draw_job_index + (is_tiler ? 1 : 0), +#ifdef __LP64__ + .job_descriptor_size = 1, +#endif + }; + + /* XXX: What is this? */ +#ifdef T6XX + + if (is_tiler) + job.unknown_flags = ctx->draw_count ? 64 : 1; + +#endif + + /* Only non-elided tiler jobs have dependencies which are known at this point */ + + if (is_tiler && !is_elided_tiler) { + /* Tiler jobs depend on vertex jobs */ + + job.job_dependency_index_1 = draw_job_index; + + /* Tiler jobs also depend on the previous tiler job */ + + if (ctx->draw_count) + job.job_dependency_index_2 = draw_job_index - 1; + } + + struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; + + /* There's some padding hacks on 32-bit */ + +#ifdef __LP64__ + int offset = 0; +#else + int offset = 4; +#endif + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); + memcpy(transfer.cpu, &job, sizeof(job)); + memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); + return transfer; +} + +/* Generates a set value job. It's unclear what exactly this does, why it's + * necessary, and when to call it. */ + +static void +panfrost_set_value_job(struct panfrost_context *ctx) +{ + struct mali_job_descriptor_header job = { + .job_type = JOB_TYPE_SET_VALUE, + .job_descriptor_size = 1, + .job_index = 1 + (2 * ctx->draw_count), + }; + + struct mali_payload_set_value payload = { + .out = ctx->misc_0.gpu, + .unknown = 0x3, + }; + + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); + memcpy(transfer.cpu, &job, sizeof(job)); + memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); + + ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu; + ctx->set_value_job = transfer.gpu; +} + +/* Generate a fragment job. This should be called once per frame. (According to + * presentations, this is supposed to correspond to eglSwapBuffers) */ + +mali_ptr +panfrost_fragment_job(struct panfrost_context *ctx) +{ + /* Update fragment FBD */ + panfrost_set_fragment_afbc(ctx); + + if (ctx->pipe_framebuffer.nr_cbufs == 1) { + struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; + int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); + + if (rsrc->bo->has_checksum) { + //ctx->fragment_fbd.unk3 |= 0xa00000; + //ctx->fragment_fbd.unk3 = 0xa02100; + ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; + ctx->fragment_extra.unk |= 0x420; + ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride; + ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0; + } + } + + /* The frame is complete and therefore the framebuffer descriptor is + * ready for linkage and upload */ + + size_t sz = sizeof(ctx->fragment_fbd) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1; + struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); + off_t offset = 0; + + memcpy(fbd_t.cpu, &ctx->fragment_fbd, sizeof(ctx->fragment_fbd)); + offset += sizeof(ctx->fragment_fbd); + + /* Upload extra framebuffer info if necessary */ + if (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA) { + memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); + offset += sizeof(struct bifrost_fb_extra); + } + + /* Upload (single) render target */ + memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); + + /* Generate the fragment (frame) job */ + + struct mali_job_descriptor_header header = { + .job_type = JOB_TYPE_FRAGMENT, + .job_index = 1, +#ifdef __LP64__ + .job_descriptor_size = 1 +#endif + }; + + struct mali_payload_fragment payload = { + .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), + .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), + .framebuffer = fbd_t.gpu | PANFROST_DEFAULT_FBD | (ctx->fragment_fbd.unk3 & MALI_MFBD_EXTRA ? 2 : 0), + }; + + /* Normally, there should be no padding. However, fragment jobs are + * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes + * of zero padding in between. */ + + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload)); + memcpy(transfer.cpu, &header, sizeof(header)); + memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); + return transfer.gpu; +} + +/* Emits attributes and varying descriptors, which should be called every draw, + * excepting some obscure circumstances */ + +static void +panfrost_emit_vertex_data(struct panfrost_context *ctx) +{ + /* TODO: Only update the dirtied buffers */ + union mali_attr attrs[PIPE_MAX_ATTRIBS]; + union mali_attr varyings[PIPE_MAX_ATTRIBS]; + + unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); + + for (int i = 0; i < ctx->vertex_buffer_count; ++i) { + struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); + + /* Let's figure out the layout of the attributes in memory so + * we can be smart about size computation. The idea is to + * figure out the maximum src_offset, which tells us the latest + * spot a vertex could start. Meanwhile, we figure out the size + * of the attribute memory (assuming interleaved + * representation) and tack on the max src_offset for a + * reasonably good upper bound on the size. + * + * Proving correctness is left as an exercise to the reader. + */ + + unsigned max_src_offset = 0; + + for (unsigned j = 0; j < ctx->vertex->num_elements; ++j) { + if (ctx->vertex->pipe[j].vertex_buffer_index != i) continue; + max_src_offset = MAX2(max_src_offset, ctx->vertex->pipe[j].src_offset); + } + + /* Offset vertex count by draw_start to make sure we upload enough */ + attrs[i].stride = buf->stride; + attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset; + + /* Vertex elements are -already- GPU-visible, at + * rsrc->gpu. However, attribute buffers must be 64 aligned. If + * it is not, for now we have to duplicate the buffer. */ + + mali_ptr effective_address = (rsrc->bo->gpu[0] + buf->buffer_offset); + + if (effective_address & 0x3F) { + attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu[0] + buf->buffer_offset, attrs[i].size) | 1; + } else { + attrs[i].elements = effective_address | 1; + } + } + + struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings; + + for (int i = 0; i < vars->varying_buffer_count; ++i) { + mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; + + varyings[i].elements = varying_address | 1; + varyings[i].stride = vars->varyings_stride[i]; + varyings[i].size = vars->varyings_stride[i] * invocation_count; + + /* If this varying has to be linked somewhere, do it now. See + * pan_assemble.c for the indices. TODO: Use a more generic + * linking interface */ + + if (i == 1) { + /* gl_Position */ + ctx->payload_tiler.postfix.position_varying = varying_address; + } else if (i == 2) { + /* gl_PointSize */ + ctx->payload_tiler.primitive_size.pointer = varying_address; + } + + /* Varyings appear to need 64-byte alignment */ + ctx->varying_height += ALIGN(varyings[i].size, 64); + + /* Ensure that we fit */ + assert(ctx->varying_height < ctx->varying_mem.size); + } + + ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr)); + + mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr)); + ctx->payload_vertex.postfix.varyings = varyings_p; + ctx->payload_tiler.postfix.varyings = varyings_p; +} + +/* Go through dirty flags and actualise them in the cmdstream. */ + +void +panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) +{ + if (with_vertex_data) { + panfrost_emit_vertex_data(ctx); + } + + if (ctx->dirty & PAN_DIRTY_RASTERIZER) { + ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; + panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); + } + + if (ctx->occlusion_query) { + ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_BOOLEAN; + ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; + } + + if (ctx->dirty & PAN_DIRTY_VS) { + assert(ctx->vs); + + struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; + + /* Late shader descriptor assignments */ + vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; + vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; + + /* Who knows */ + vs->tripipe->midgard1.unknown1 = 0x2201; + + ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; + + /* Varying descriptor is tied to the vertex shader. Also the + * fragment shader, I suppose, but it's generated with the + * vertex shader so */ + + struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings; + + ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor; + ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment; + } + + if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { + /* Check if we need to link the gl_PointSize varying */ + assert(ctx->vs); + struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; + + bool needs_gl_point_size = vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS; + + if (!needs_gl_point_size) { + /* If the size is constant, write it out. Otherwise, + * don't touch primitive_size (since we would clobber + * the pointer there) */ + + ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width; + } + + /* Set the flag for varying (pointer) point size if the shader needs that */ + SET_BIT(ctx->payload_tiler.prefix.unknown_draw, MALI_DRAW_VARYING_SIZE, needs_gl_point_size); + } + + /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ + if (ctx->fs) + ctx->dirty |= PAN_DIRTY_FS; + + if (ctx->dirty & PAN_DIRTY_FS) { + assert(ctx->fs); + struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; + +#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name + + COPY(shader); + COPY(attribute_count); + COPY(varying_count); + COPY(midgard1.uniform_count); + COPY(midgard1.work_count); + COPY(midgard1.unknown2); + +#undef COPY + /* If there is a blend shader, work registers are shared */ + + if (ctx->blend->has_blend_shader) + ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; + + /* Set late due to depending on render state */ + /* The one at the end seems to mean "1 UBO" */ + ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201; + + /* Assign texture/sample count right before upload */ + ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; + ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; + + /* Assign the stencil refs late */ + ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; + ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; + + /* CAN_DISCARD should be set if the fragment shader possibly + * contains a 'discard' instruction, or maybe other + * circumstances. It is likely this is related to optimizations + * related to forward-pixel kill, as per "Mali Performance 3: + * Is EGL_BUFFER_PRESERVED a good thing?" by Peter Harris + */ + + if (variant->can_discard) { + ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; + ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE; + ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000; + ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; + } + + if (ctx->blend->has_blend_shader) + ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; + + size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); + memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); + + ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; + +#ifdef T8XX + /* Additional blend descriptor tacked on for newer systems */ + + unsigned blend_count = 0; + + if (ctx->blend->has_blend_shader) { + /* For a blend shader, the bottom nibble corresponds to + * the number of work registers used, which signals the + * -existence- of a blend shader */ + + assert(ctx->blend->blend_work_count >= 2); + blend_count |= MIN2(ctx->blend->blend_work_count, 3); + } else { + /* Otherwise, the bottom bit simply specifies if + * blending (anything other than REPLACE) is enabled */ + + /* XXX: Less ugly way to do this? */ + bool no_blending = + (ctx->blend->equation.rgb_mode == 0x122) && + (ctx->blend->equation.alpha_mode == 0x122) && + (ctx->blend->equation.color_mask == 0xf); + + if (!no_blending) + blend_count |= 0x1; + } + + /* Second blend equation is always a simple replace */ + + uint64_t replace_magic = 0xf0122122; + struct mali_blend_equation replace_mode; + memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); + + struct mali_blend_meta blend_meta[] = { + { + .unk1 = 0x200 | blend_count, + .blend_equation_1 = ctx->blend->equation, + .blend_equation_2 = replace_mode + }, + }; + + if (ctx->blend->has_blend_shader) + memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader)); + + memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); +#endif + } + + if (ctx->dirty & PAN_DIRTY_VERTEX) { + ctx->payload_vertex.postfix.attribute_meta = ctx->vertex->descriptor_ptr; + } + + if (ctx->dirty & PAN_DIRTY_SAMPLERS) { + /* Upload samplers back to back, no padding */ + + for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { + if (!ctx->sampler_count[t]) continue; + + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); + struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; + + for (int i = 0; i < ctx->sampler_count[t]; ++i) { + desc[i] = ctx->samplers[t][i]->hw; + } + + if (t == PIPE_SHADER_FRAGMENT) + ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; + else if (t == PIPE_SHADER_VERTEX) + ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; + else + assert(0); + } + } + + if (ctx->dirty & PAN_DIRTY_TEXTURES) { + for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { + /* Shortcircuit */ + if (!ctx->sampler_view_count[t]) continue; + + uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { + if (!ctx->sampler_views[t][i]) + continue; + + struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; + struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; + + /* Inject the address in. */ + for (int l = 0; l < (tex_rsrc->last_level + 1); ++l) + ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] = rsrc->bo->gpu[l]; + + /* Workaround maybe-errata (?) with non-mipmaps */ + int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; + + if (!rsrc->bo->is_mipmap) { +#ifdef T6XX + /* HW ERRATA, not needed after T6XX */ + ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; + + ctx->sampler_views[t][i]->hw.unknown3A = 1; +#endif + ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0; + } + + trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); + + /* Restore */ + ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; + +#ifdef T6XX + ctx->sampler_views[t][i]->hw.unknown3A = 0; +#endif + } + + mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); + + if (t == PIPE_SHADER_FRAGMENT) + ctx->payload_tiler.postfix.texture_trampoline = trampoline; + else if (t == PIPE_SHADER_VERTEX) + ctx->payload_vertex.postfix.texture_trampoline = trampoline; + else + assert(0); + } + } + + /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */ + const struct pipe_viewport_state *vp = &ctx->pipe_viewport; + + float viewport_vec4[] = { + vp->scale[0], + fabsf(vp->scale[1]), + + vp->translate[0], + /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */ + }; + + for (int i = 0; i < PIPE_SHADER_TYPES; ++i) { + struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i]; + + if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) { + /* It doesn't matter if we don't use all the memory; + * we'd need a dummy UBO anyway. Compute the max */ + + size_t size = sizeof(viewport_vec4) + buf->size; + struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); + + /* Keep track how much we've uploaded */ + off_t offset = 0; + + if (i == PIPE_SHADER_VERTEX) { + /* Upload viewport */ + memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4)); + offset += sizeof(viewport_vec4); + } + + /* Upload uniforms */ + memcpy(transfer.cpu + offset, buf->buffer, buf->size); + + int uniform_count = 0; + + struct mali_vertex_tiler_postfix *postfix; + + switch (i) { + case PIPE_SHADER_VERTEX: + uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count; + postfix = &ctx->payload_vertex.postfix; + break; + + case PIPE_SHADER_FRAGMENT: + uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count; + postfix = &ctx->payload_tiler.postfix; + break; + + default: + printf("Unknown shader stage %d in uniform upload\n", i); + assert(0); + } + + /* Also attach the same buffer as a UBO for extended access */ + + struct mali_uniform_buffer_meta uniform_buffers[] = { + { + .size = MALI_POSITIVE((2 + uniform_count)), + .ptr = transfer.gpu >> 2, + }, + }; + + mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); + postfix->uniforms = transfer.gpu; + postfix->uniform_buffers = ubufs; + + buf->dirty = 0; + } + } + + ctx->dirty = 0; +} + +/* Corresponds to exactly one draw, but does not submit anything */ + +static void +panfrost_queue_draw(struct panfrost_context *ctx) +{ + /* TODO: Expand the array? */ + if (ctx->draw_count >= MAX_DRAW_CALLS) { + printf("Job buffer overflow, ignoring draw\n"); + assert(0); + } + + /* Handle dirty flags now */ + panfrost_emit_for_draw(ctx, true); + + struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false); + struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false); + + ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu; + ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu; + + ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu; + ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; + + ctx->draw_count++; +} + +/* At the end of the frame, the vertex and tiler jobs are linked together and + * then the fragment job is plonked at the end. Set value job is first for + * unknown reasons. */ + +static void +panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) +{ + if (first->job_descriptor_size) + first->next_job_64 = (u64) (uintptr_t) next; + else + first->next_job_32 = (u32) (uintptr_t) next; +} + +static void +panfrost_link_jobs(struct panfrost_context *ctx) +{ + if (ctx->draw_count) { + /* Generate the set_value_job */ + panfrost_set_value_job(ctx); + + /* Have the first vertex job depend on the set value job */ + ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index; + + /* SV -> V */ + panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]); + } + + /* V -> V/T ; T -> T/null */ + for (int i = 0; i < ctx->vertex_job_count; ++i) { + bool isLast = (i + 1) == ctx->vertex_job_count; + + panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0]: ctx->vertex_jobs[i + 1]); + } + + /* T -> T/null */ + for (int i = 0; i < ctx->tiler_job_count; ++i) { + bool isLast = (i + 1) == ctx->tiler_job_count; + panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]); + } +} + +/* The entire frame is in memory -- send it off to the kernel! */ + +static void +panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + + /* Edge case if screen is cleared and nothing else */ + bool has_draws = ctx->draw_count > 0; + + /* Workaround a bizarre lockup (a hardware errata?) */ + if (!has_draws) + flush_immediate = true; + + /* A number of jobs are batched -- this must be linked and cleared */ + panfrost_link_jobs(ctx); + + ctx->draw_count = 0; + ctx->vertex_job_count = 0; + ctx->tiler_job_count = 0; + +#ifndef DRY_RUN + + int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws); + + /* If visual, we can stall a frame */ + + if (panfrost_is_scanout(ctx) && !flush_immediate) + screen->driver->force_flush_fragment(ctx); + + screen->last_fragment_id = fragment_id; + screen->last_fragment_flushed = false; + + /* If readback, flush now (hurts the pipelined performance) */ + if (panfrost_is_scanout(ctx) && flush_immediate) + screen->driver->force_flush_fragment(ctx); + +#ifdef DUMP_PERFORMANCE_COUNTERS + char filename[128]; + snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number); + FILE *fp = fopen(filename, "wb"); + fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); + fclose(fp); +#endif + +#endif +} + +bool dont_scanout = false; + +void +panfrost_flush( + struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct panfrost_context *ctx = pan_context(pipe); + + /* If there is nothing drawn, skip the frame */ + if (!ctx->draw_count && !ctx->frame_cleared) return; + + if (!ctx->frame_cleared) { + /* While there are draws, there was no clear. This is a partial + * update, which needs to be handled via the "wallpaper" + * method. We also need to fake a clear, just to get the + * FRAGMENT job correct. */ + + panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); + + panfrost_draw_wallpaper(pipe); + } + + /* Frame clear handled, reset */ + ctx->frame_cleared = false; + + /* Whether to stall the pipeline for immediately correct results */ + bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; + + /* Submit the frame itself */ + panfrost_submit_frame(ctx, flush_immediate); + + /* Prepare for the next frame */ + panfrost_invalidate_frame(ctx); +} + +#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; + +static int +g2m_draw_mode(enum pipe_prim_type mode) +{ + switch (mode) { + DEFINE_CASE(POINTS); + DEFINE_CASE(LINES); + DEFINE_CASE(LINE_LOOP); + DEFINE_CASE(LINE_STRIP); + DEFINE_CASE(TRIANGLES); + DEFINE_CASE(TRIANGLE_STRIP); + DEFINE_CASE(TRIANGLE_FAN); + DEFINE_CASE(QUADS); + DEFINE_CASE(QUAD_STRIP); + DEFINE_CASE(POLYGON); + + default: + printf("Illegal draw mode %d\n", mode); + assert(0); + return MALI_LINE_LOOP; + } +} + +#undef DEFINE_CASE + +static unsigned +panfrost_translate_index_size(unsigned size) +{ + switch (size) { + case 1: + return MALI_DRAW_INDEXED_UINT8; + + case 2: + return MALI_DRAW_INDEXED_UINT16; + + case 4: + return MALI_DRAW_INDEXED_UINT32; + + default: + printf("Unknown index size %d\n", size); + assert(0); + return 0; + } +} + +static const uint8_t * +panfrost_get_index_buffer_raw(const struct pipe_draw_info *info) +{ + if (info->has_user_indices) { + return (const uint8_t *) info->index.user; + } else { + struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); + return (const uint8_t *) rsrc->bo->cpu[0]; + } +} + +/* Gets a GPU address for the associated index buffer. Only gauranteed to be + * good for the duration of the draw (transient), could last longer */ + +static mali_ptr +panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) +{ + struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); + + off_t offset = info->start * info->index_size; + + if (!info->has_user_indices) { + /* Only resources can be directly mapped */ + return rsrc->bo->gpu[0] + offset; + } else { + /* Otherwise, we need to upload to transient memory */ + const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); + return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); + } +} + +static void +panfrost_draw_vbo( + struct pipe_context *pipe, + const struct pipe_draw_info *info); + +#define CALCULATE_MIN_MAX_INDEX(T, buffer, start, count) \ + for (unsigned _idx = (start); _idx < (start + count); ++_idx) { \ + T idx = buffer[_idx]; \ + if (idx > max_index) max_index = idx; \ + if (idx < min_index) min_index = idx; \ + } + +static void +panfrost_draw_vbo( + struct pipe_context *pipe, + const struct pipe_draw_info *info) +{ + struct panfrost_context *ctx = pan_context(pipe); + + ctx->payload_vertex.draw_start = info->start; + ctx->payload_tiler.draw_start = info->start; + + int mode = info->mode; + +#if 0 + /* Fallback for non-ES draw modes */ + /* Primconvert not needed on Midgard anymore due to native + * QUADS/POLYGONS. Bifrost/desktop-GL may need it though so not + * removing */ + + if (info->mode >= PIPE_PRIM_QUADS) { + if (info->mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { + mode = PIPE_PRIM_TRIANGLE_FAN; + } else { + if (info->count < 4) { + /* Degenerate case? */ + return; + } + + util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base); + util_primconvert_draw_vbo(ctx->primconvert, info); + return; + } + } +#endif + + ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); + + ctx->vertex_count = info->count; + + /* For non-indexed draws, they're the same */ + unsigned invocation_count = ctx->vertex_count; + + /* For higher amounts of vertices (greater than what fits in a 16-bit + * short), the other value is needed, otherwise there will be bizarre + * rendering artefacts. It's not clear what these values mean yet. */ + + ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000); + ctx->payload_tiler.prefix.unknown_draw |= (info->mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000; + + if (info->index_size) { + /* Calculate the min/max index used so we can figure out how + * many times to invoke the vertex shader */ + + const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); + + int min_index = INT_MAX; + int max_index = 0; + + if (info->index_size == 1) { + CALCULATE_MIN_MAX_INDEX(uint8_t, ibuf8, info->start, info->count); + } else if (info->index_size == 2) { + const uint16_t *ibuf16 = (const uint16_t *) ibuf8; + CALCULATE_MIN_MAX_INDEX(uint16_t, ibuf16, info->start, info->count); + } else if (info->index_size == 4) { + const uint32_t *ibuf32 = (const uint32_t *) ibuf8; + CALCULATE_MIN_MAX_INDEX(uint32_t, ibuf32, info->start, info->count); + } else { + assert(0); + } + + /* Make sure we didn't go crazy */ + assert(min_index < INT_MAX); + assert(max_index > 0); + assert(max_index > min_index); + + /* Use the corresponding values */ + invocation_count = max_index - min_index + 1; + ctx->payload_vertex.draw_start = min_index; + ctx->payload_tiler.draw_start = min_index; + + ctx->payload_tiler.prefix.negative_start = -min_index; + ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count); + + //assert(!info->restart_index); /* TODO: Research */ + assert(!info->index_bias); + //assert(!info->min_index); /* TODO: Use value */ + + ctx->payload_tiler.prefix.unknown_draw |= panfrost_translate_index_size(info->index_size); + ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); + } else { + /* Index count == vertex count, if no indexing is applied, as + * if it is internally indexed in the expected order */ + + ctx->payload_tiler.prefix.negative_start = 0; + ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); + + /* Reverse index state */ + ctx->payload_tiler.prefix.unknown_draw &= ~MALI_DRAW_INDEXED_UINT32; + ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; + } + + ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count); + ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count); + + /* Fire off the draw itself */ + panfrost_queue_draw(ctx); +} + +/* CSO state */ + +static void +panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +panfrost_set_scissor(struct panfrost_context *ctx) +{ + const struct pipe_scissor_state *ss = &ctx->scissor; + + if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) { + ctx->viewport->viewport0[0] = ss->minx; + ctx->viewport->viewport0[1] = ss->miny; + ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx); + ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy); + } else { + ctx->viewport->viewport0[0] = 0; + ctx->viewport->viewport0[1] = 0; + ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width); + ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height); + } +} + +static void * +panfrost_create_rasterizer_state( + struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); + + so->base = *cso; + + /* Bitmask, unknown meaning of the start value */ +#ifdef T8XX + so->tiler_gl_enables = 0x7; +#else + so->tiler_gl_enables = 0x105; +#endif + + so->tiler_gl_enables |= MALI_FRONT_FACE( + cso->front_ccw ? MALI_CCW : MALI_CW); + + if (cso->cull_face & PIPE_FACE_FRONT) + so->tiler_gl_enables |= MALI_CULL_FACE_FRONT; + + if (cso->cull_face & PIPE_FACE_BACK) + so->tiler_gl_enables |= MALI_CULL_FACE_BACK; + + return so; +} + +static void +panfrost_bind_rasterizer_state( + struct pipe_context *pctx, + void *hwcso) +{ + struct panfrost_context *ctx = pan_context(pctx); + struct pipe_rasterizer_state *cso = hwcso; + + /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ + if (!hwcso) + return; + + /* If scissor test has changed, we'll need to update that now */ + bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor; + + ctx->rasterizer = hwcso; + + /* Actualise late changes */ + if (update_scissor) + panfrost_set_scissor(ctx); + + ctx->dirty |= PAN_DIRTY_RASTERIZER; +} + +static void * +panfrost_create_vertex_elements_state( + struct pipe_context *pctx, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct panfrost_context *ctx = pan_context(pctx); + struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); + + so->num_elements = num_elements; + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + + struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_attr_meta) * num_elements, HEAP_DESCRIPTOR); + so->hw = (struct mali_attr_meta *) transfer.cpu; + so->descriptor_ptr = transfer.gpu; + + /* Allocate memory for the descriptor state */ + + for (int i = 0; i < num_elements; ++i) { + so->hw[i].index = elements[i].vertex_buffer_index; + + enum pipe_format fmt = elements[i].src_format; + const struct util_format_description *desc = util_format_description(fmt); + so->hw[i].unknown1 = 0x2; + so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels); + + so->hw[i].format = panfrost_find_format(desc); + + /* The field itself should probably be shifted over */ + so->hw[i].src_offset = elements[i].src_offset; + } + + return so; +} + +static void +panfrost_bind_vertex_elements_state( + struct pipe_context *pctx, + void *hwcso) +{ + struct panfrost_context *ctx = pan_context(pctx); + + ctx->vertex = hwcso; + ctx->dirty |= PAN_DIRTY_VERTEX; +} + +static void +panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) +{ + printf("Vertex elements delete leaks descriptor\n"); + free(hwcso); +} + +static void * +panfrost_create_shader_state( + struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); + so->base = *cso; + + /* Token deep copy to prevent memory corruption */ + + if (cso->type == PIPE_SHADER_IR_TGSI) + so->base.tokens = tgsi_dup_tokens(so->base.tokens); + + return so; +} + +static void +panfrost_delete_shader_state( + struct pipe_context *pctx, + void *so) +{ + printf("Deleting shader state maybe leaks tokens, per-variant compiled shaders, per-variant descriptors\n"); + free(so); +} + +static void * +panfrost_create_sampler_state( + struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); + so->base = *cso; + + /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */ + + struct mali_sampler_descriptor sampler_descriptor = { + .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter)) + | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter)) + | translate_mip_filter(cso->min_mip_filter) + | 0x20, + + .wrap_s = translate_tex_wrap(cso->wrap_s), + .wrap_t = translate_tex_wrap(cso->wrap_t), + .wrap_r = translate_tex_wrap(cso->wrap_r), + .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), + .border_color = { + cso->border_color.f[0], + cso->border_color.f[1], + cso->border_color.f[2], + cso->border_color.f[3] + }, + .min_lod = FIXED_16(0.0), + .max_lod = FIXED_16(31.0), + .unknown2 = 1, + }; + + so->hw = sampler_descriptor; + + return so; +} + +static void +panfrost_bind_sampler_states( + struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start_slot, unsigned num_sampler, + void **sampler) +{ + assert(start_slot == 0); + + struct panfrost_context *ctx = pan_context(pctx); + + /* XXX: Should upload, not just copy? */ + ctx->sampler_count[shader] = num_sampler; + memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); + + ctx->dirty |= PAN_DIRTY_SAMPLERS; +} + +static bool +panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant) +{ + struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha; + + if (alpha->enabled || variant->alpha_state.enabled) { + /* Make sure enable state is at least the same */ + if (alpha->enabled != variant->alpha_state.enabled) { + return false; + } + + /* Check that the contents of the test are the same */ + bool same_func = alpha->func == variant->alpha_state.func; + bool same_ref = alpha->ref_value == variant->alpha_state.ref_value; + + if (!(same_func && same_ref)) { + return false; + } + } + /* Otherwise, we're good to go */ + return true; +} + +static void +panfrost_bind_fs_state( + struct pipe_context *pctx, + void *hwcso) +{ + struct panfrost_context *ctx = pan_context(pctx); + + ctx->fs = hwcso; + + if (hwcso) { + /* Match the appropriate variant */ + + signed variant = -1; + + struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso; + + for (unsigned i = 0; i < variants->variant_count; ++i) { + if (panfrost_variant_matches(ctx, &variants->variants[i])) { + variant = i; + break; + } + } + + if (variant == -1) { + /* No variant matched, so create a new one */ + variant = variants->variant_count++; + assert(variants->variant_count < MAX_SHADER_VARIANTS); + + variants->variants[variant].base = hwcso; + variants->variants[variant].alpha_state = ctx->depth_stencil->alpha; + + /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */ + struct panfrost_context *ctx = pan_context(pctx); + struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); + + variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu; + variants->variants[variant].tripipe_gpu = transfer.gpu; + + } + + /* Select this variant */ + variants->active_variant = variant; + + struct panfrost_shader_state *shader_state = &variants->variants[variant]; + assert(panfrost_variant_matches(ctx, shader_state)); + + /* Now we have a variant selected, so compile and go */ + + if (!shader_state->compiled) { + panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state); + shader_state->compiled = true; + } + } + + ctx->dirty |= PAN_DIRTY_FS; +} + +static void +panfrost_bind_vs_state( + struct pipe_context *pctx, + void *hwcso) +{ + struct panfrost_context *ctx = pan_context(pctx); + + ctx->vs = hwcso; + + if (hwcso) { + if (!ctx->vs->variants[0].compiled) { + ctx->vs->variants[0].base = hwcso; + + /* TODO DRY from above */ + struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); + ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu; + ctx->vs->variants[0].tripipe_gpu = transfer.gpu; + + panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]); + ctx->vs->variants[0].compiled = true; + } + } + + ctx->dirty |= PAN_DIRTY_VS; +} + +static void +panfrost_set_vertex_buffers( + struct pipe_context *pctx, + unsigned start_slot, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct panfrost_context *ctx = pan_context(pctx); + assert(num_buffers <= PIPE_MAX_ATTRIBS); + + /* XXX: Dirty tracking? etc */ + if (buffers) { + size_t sz = sizeof(buffers[0]) * num_buffers; + ctx->vertex_buffers = malloc(sz); + ctx->vertex_buffer_count = num_buffers; + memcpy(ctx->vertex_buffers, buffers, sz); + } else { + if (ctx->vertex_buffers) { + free(ctx->vertex_buffers); + ctx->vertex_buffers = NULL; + } + + ctx->vertex_buffer_count = 0; + } +} + +static void +panfrost_set_constant_buffer( + struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct panfrost_context *ctx = pan_context(pctx); + struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; + + size_t sz = buf ? buf->buffer_size : 0; + + /* Free previous buffer */ + + pbuf->dirty = true; + pbuf->size = sz; + + if (pbuf->buffer) { + free(pbuf->buffer); + pbuf->buffer = NULL; + } + + /* If unbinding, we're done */ + + if (!buf) + return; + + /* Multiple constant buffers not yet supported */ + assert(index == 0); + + const uint8_t *cpu; + + struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); + + if (rsrc) { + cpu = rsrc->bo->cpu[0]; + } else if (buf->user_buffer) { + cpu = buf->user_buffer; + } else { + printf("No constant buffer?\n"); + return; + } + + /* Copy the constant buffer into the driver context for later upload */ + + pbuf->buffer = malloc(sz); + memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); +} + +static void +panfrost_set_stencil_ref( + struct pipe_context *pctx, + const struct pipe_stencil_ref *ref) +{ + struct panfrost_context *ctx = pan_context(pctx); + ctx->stencil_ref = *ref; + + /* Shader core dirty */ + ctx->dirty |= PAN_DIRTY_FS; +} + +static struct pipe_sampler_view * +panfrost_create_sampler_view( + struct pipe_context *pctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *template) +{ + struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view); + int bytes_per_pixel = util_format_get_blocksize(texture->format); + + pipe_reference(NULL, &texture->reference); + + struct panfrost_resource *prsrc = (struct panfrost_resource *) texture; + + so->base = *template; + so->base.texture = texture; + so->base.reference.count = 1; + so->base.context = pctx; + + /* sampler_views correspond to texture descriptors, minus the texture + * (data) itself. So, we serialise the descriptor here and cache it for + * later. */ + + /* TODO: Other types of textures */ + assert(template->target == PIPE_TEXTURE_2D); + + /* Make sure it's something with which we're familiar */ + assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4); + + /* TODO: Detect from format better */ + const struct util_format_description *desc = util_format_description(prsrc->base.format); + + unsigned char user_swizzle[4] = { + template->swizzle_r, + template->swizzle_g, + template->swizzle_b, + template->swizzle_a + }; + + enum mali_format format = panfrost_find_format(desc); + + struct mali_texture_descriptor texture_descriptor = { + .width = MALI_POSITIVE(texture->width0), + .height = MALI_POSITIVE(texture->height0), + .depth = MALI_POSITIVE(texture->depth0), + + /* TODO: Decode */ + .format = { + .swizzle = panfrost_translate_swizzle_4(desc->swizzle), + .format = format, + + .usage1 = 0x0, + .is_not_cubemap = 1, + + /* 0x11 - regular texture 2d, uncompressed tiled */ + /* 0x12 - regular texture 2d, uncompressed linear */ + /* 0x1c - AFBC compressed (internally tiled, probably) texture 2D */ + + .usage2 = prsrc->bo->has_afbc ? 0x1c : (prsrc->bo->tiled ? 0x11 : 0x12), + }, + + .swizzle = panfrost_translate_swizzle_4(user_swizzle) + }; + + /* TODO: Other base levels require adjusting dimensions / level numbers / etc */ + assert (template->u.tex.first_level == 0); + + texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level; + + so->hw = texture_descriptor; + + return (struct pipe_sampler_view *) so; +} + +static void +panfrost_set_sampler_views( + struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start_slot, unsigned num_views, + struct pipe_sampler_view **views) +{ + struct panfrost_context *ctx = pan_context(pctx); + + assert(start_slot == 0); + + ctx->sampler_view_count[shader] = num_views; + memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); + + ctx->dirty |= PAN_DIRTY_TEXTURES; +} + +static void +panfrost_sampler_view_destroy( + struct pipe_context *pctx, + struct pipe_sampler_view *views) +{ + //struct panfrost_context *ctx = pan_context(pctx); + + /* TODO */ + + free(views); +} + +static void +panfrost_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *fb) +{ + struct panfrost_context *ctx = pan_context(pctx); + + /* Flush when switching away from an FBO */ + + if (!panfrost_is_scanout(ctx)) { + panfrost_flush(pctx, NULL, 0); + } + + ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; + ctx->pipe_framebuffer.samples = fb->samples; + ctx->pipe_framebuffer.layers = fb->layers; + ctx->pipe_framebuffer.width = fb->width; + ctx->pipe_framebuffer.height = fb->height; + + for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; + + /* check if changing cbuf */ + if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; + + if (cb && (i != 0)) { + printf("XXX: Multiple render targets not supported before t7xx!\n"); + assert(0); + } + + /* assign new */ + pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb); + + if (!cb) + continue; + + ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + panfrost_attach_vt_framebuffer(ctx); + panfrost_new_frag_framebuffer(ctx); + panfrost_set_scissor(ctx); + + struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); + bool is_scanout = panfrost_is_scanout(ctx); + + if (!is_scanout && !tex->bo->has_afbc) { + /* The blob is aggressive about enabling AFBC. As such, + * it's pretty much necessary to use it here, since we + * have no traces of non-compressed FBO. */ + + panfrost_enable_afbc(ctx, tex, false); + } + + if (!is_scanout && !tex->bo->has_checksum) { + /* Enable transaction elimination if we can */ + panfrost_enable_checksum(ctx, tex); + } + } + + { + struct pipe_surface *zb = fb->zsbuf; + + if (ctx->pipe_framebuffer.zsbuf != zb) { + pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb); + + if (zb) { + /* FBO has depth */ + + ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + panfrost_attach_vt_framebuffer(ctx); + panfrost_new_frag_framebuffer(ctx); + panfrost_set_scissor(ctx); + + struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture); + + if (!tex->bo->has_afbc && !panfrost_is_scanout(ctx)) + panfrost_enable_afbc(ctx, tex, true); + } + } + } + + /* Force a clear XXX wrong? */ + if (ctx->last_clear.color) + panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); + +#if 0 + /* Don't consider the buffer dirty */ + ctx->dirty &= ~PAN_DIRTY_CLEAR; +#endif +} + +static void * +panfrost_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); + so->base = *blend; + + /* TODO: The following features are not yet implemented */ + assert(!blend->logicop_enable); + assert(!blend->alpha_to_coverage); + assert(!blend->alpha_to_one); + + /* Compile the blend state, first as fixed-function if we can */ + + if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color)) + return so; + + /* If we can't, compile a blend shader instead */ + + panfrost_make_blend_shader(ctx, so, &ctx->blend_color); + + return so; +} + +static void +panfrost_bind_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct panfrost_context *ctx = pan_context(pipe); + struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; + struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; + ctx->blend = pblend; + + if (!blend) + return; + + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); + + /* TODO: Attach color */ + + /* Shader itself is not dirty, but the shader core is */ + ctx->dirty |= PAN_DIRTY_FS; +} + +static void +panfrost_delete_blend_state(struct pipe_context *pipe, + void *blend) +{ + printf("Deleting blend state may leak blend shader\n"); + free(blend); +} + +static void +panfrost_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct panfrost_context *ctx = pan_context(pipe); + + /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */ + + if (blend_color) { + ctx->blend_color = *blend_color; + + /* The blend mode depends on the blend constant color, due to the + * fixed/programmable split. So, we're forced to regenerate the blend + * equation */ + + /* TODO: Attach color */ + } +} + +static void * +panfrost_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +static void +panfrost_bind_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct panfrost_context *ctx = pan_context(pipe); + struct pipe_depth_stencil_alpha_state *depth_stencil = cso; + ctx->depth_stencil = depth_stencil; + + if (!depth_stencil) + return; + + /* Alpha does not exist in the hardware (it's not in ES3), so it's + * emulated in the fragment shader */ + + if (depth_stencil->alpha.enabled) { + /* We need to trigger a new shader (maybe) */ + ctx->base.bind_fs_state(&ctx->base, ctx->fs); + } + + /* Stencil state */ + SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */ + + panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); + ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; + + panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back); + ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask; + + /* Depth state (TODO: Refactor) */ + SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled); + + int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; + + ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; + ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); + + /* Bounds test not implemented */ + assert(!depth_stencil->depth.bounds_test); + + ctx->dirty |= PAN_DIRTY_FS; +} + +static void +panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + free( depth ); +} + +static void +panfrost_set_sample_mask(struct pipe_context *pipe, + unsigned sample_mask) +{ +} + +static void +panfrost_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + //struct panfrost_context *panfrost = pan_context(pipe); +} + +static void +panfrost_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewports) +{ + struct panfrost_context *ctx = pan_context(pipe); + + assert(start_slot == 0); + assert(num_viewports == 1); + + ctx->pipe_viewport = *viewports; + +#if 0 + /* TODO: What if not centered? */ + float w = abs(viewports->scale[0]) * 2.0; + float h = abs(viewports->scale[1]) * 2.0; + + ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w); + ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h); +#endif +} + +static void +panfrost_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissors) +{ + struct panfrost_context *ctx = pan_context(pipe); + + assert(start_slot == 0); + assert(num_scissors == 1); + + ctx->scissor = *scissors; + + panfrost_set_scissor(ctx); +} + +static void +panfrost_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + //struct panfrost_context *panfrost = pan_context(pipe); +} + +static void +panfrost_set_active_query_state(struct pipe_context *pipe, + boolean enable) +{ + //struct panfrost_context *panfrost = pan_context(pipe); +} + +static void +panfrost_destroy(struct pipe_context *pipe) +{ + struct panfrost_context *panfrost = pan_context(pipe); + + if (panfrost->blitter) + util_blitter_destroy(panfrost->blitter); +} + +static struct pipe_query * +panfrost_create_query(struct pipe_context *pipe, + unsigned type, + unsigned index) +{ + struct panfrost_query *q = CALLOC_STRUCT(panfrost_query); + + q->type = type; + q->index = index; + + return (struct pipe_query *) q; +} + +static void +panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + FREE(q); +} + +static boolean +panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_query *query = (struct panfrost_query *) q; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + { + /* Allocate a word for the query results to be stored */ + query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR); + + ctx->occlusion_query = query; + + break; + } + + default: + fprintf(stderr, "Skipping query %d\n", query->type); + break; + } + + return true; +} + +static bool +panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct panfrost_context *ctx = pan_context(pipe); + ctx->occlusion_query = NULL; + return true; +} + +static boolean +panfrost_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + union pipe_query_result *vresult) +{ + /* STUB */ + struct panfrost_query *query = (struct panfrost_query *) q; + + /* We need to flush out the jobs to actually run the counter, TODO + * check wait, TODO wallpaper after if needed */ + + panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { + /* Read back the query results */ + unsigned *result = (unsigned *) query->transfer.cpu; + unsigned passed = *result; + + vresult->b = !!passed; + break; + } + default: + fprintf(stderr, "Skipped query get %d\n", query->type); + break; + } + + return true; +} + +static void +panfrost_setup_hardware(struct panfrost_context *ctx) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + + for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) { + /* Allocate the beginning of the transient pool */ + int entry_size = (1 << 22); /* 4MB */ + + ctx->transient_pools[i].entry_size = entry_size; + ctx->transient_pools[i].entry_count = 1; + + ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); + } + + screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); + screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 0, 0, 0); + screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); + screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_GROWABLE, 1, 128); + screen->driver->allocate_slab(screen, &ctx->misc_0, 128, false, PAN_ALLOCATE_GROWABLE, 1, 128); + +} + +/* New context creation, which also does hardware initialisation since I don't + * know the better way to structure this :smirk: */ + +struct pipe_context * +panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) +{ + struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); + memset(ctx, 0, sizeof(*ctx)); + struct pipe_context *gallium = (struct pipe_context *) ctx; + + gallium->screen = screen; + + gallium->destroy = panfrost_destroy; + + gallium->set_framebuffer_state = panfrost_set_framebuffer_state; + + gallium->flush = panfrost_flush; + gallium->clear = panfrost_clear; + gallium->draw_vbo = panfrost_draw_vbo; + + gallium->set_vertex_buffers = panfrost_set_vertex_buffers; + gallium->set_constant_buffer = panfrost_set_constant_buffer; + + gallium->set_stencil_ref = panfrost_set_stencil_ref; + + gallium->create_sampler_view = panfrost_create_sampler_view; + gallium->set_sampler_views = panfrost_set_sampler_views; + gallium->sampler_view_destroy = panfrost_sampler_view_destroy; + + gallium->create_rasterizer_state = panfrost_create_rasterizer_state; + gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state; + gallium->delete_rasterizer_state = panfrost_generic_cso_delete; + + gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state; + gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; + gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state; + + gallium->create_fs_state = panfrost_create_shader_state; + gallium->delete_fs_state = panfrost_delete_shader_state; + gallium->bind_fs_state = panfrost_bind_fs_state; + + gallium->create_vs_state = panfrost_create_shader_state; + gallium->delete_vs_state = panfrost_delete_shader_state; + gallium->bind_vs_state = panfrost_bind_vs_state; + + gallium->create_sampler_state = panfrost_create_sampler_state; + gallium->delete_sampler_state = panfrost_generic_cso_delete; + gallium->bind_sampler_states = panfrost_bind_sampler_states; + + gallium->create_blend_state = panfrost_create_blend_state; + gallium->bind_blend_state = panfrost_bind_blend_state; + gallium->delete_blend_state = panfrost_delete_blend_state; + + gallium->set_blend_color = panfrost_set_blend_color; + + gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; + gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state; + gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state; + + gallium->set_sample_mask = panfrost_set_sample_mask; + + gallium->set_clip_state = panfrost_set_clip_state; + gallium->set_viewport_states = panfrost_set_viewport_states; + gallium->set_scissor_states = panfrost_set_scissor_states; + gallium->set_polygon_stipple = panfrost_set_polygon_stipple; + gallium->set_active_query_state = panfrost_set_active_query_state; + + gallium->create_query = panfrost_create_query; + gallium->destroy_query = panfrost_destroy_query; + gallium->begin_query = panfrost_begin_query; + gallium->end_query = panfrost_end_query; + gallium->get_query_result = panfrost_get_query_result; + + panfrost_resource_context_init(gallium); + + panfrost_setup_hardware(ctx); + + /* XXX: leaks */ + gallium->stream_uploader = u_upload_create_default(gallium); + gallium->const_uploader = gallium->stream_uploader; + assert(gallium->stream_uploader); + + ctx->primconvert = util_primconvert_create(gallium, + (1 << PIPE_PRIM_QUADS) - 1); + assert(ctx->primconvert); + + ctx->blitter = util_blitter_create(gallium); + assert(ctx->blitter); + + /* Prepare for render! */ + + /* TODO: XXX */ + ctx->vt_framebuffer = panfrost_emit_fbd(ctx); + + panfrost_emit_vertex_payload(ctx); + panfrost_emit_tiler_payload(ctx); + panfrost_invalidate_frame(ctx); + panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); + panfrost_default_shader_backend(ctx); + panfrost_generate_space_filler_indices(); + + return gallium; +} diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index f1378583360..bda8155aac6 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -356,6 +356,9 @@ panfrost_flush( struct pipe_fence_handle **fence, unsigned flags); +mali_ptr +panfrost_fragment_job(struct panfrost_context *ctx); + void panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state); diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c new file mode 100644 index 00000000000..cb01cce634f --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_drm.c @@ -0,0 +1,42 @@ +/* + * © Copyright 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "util/u_memory.h" + +#include "pan_screen.h" +#include "pan_drm.h" + +struct panfrost_drm { + struct panfrost_driver base; + int fd; +}; + +struct panfrost_driver * +panfrost_create_drm_driver(int fd) +{ + struct panfrost_drm *driver = CALLOC_STRUCT(panfrost_drm); + + driver->fd = fd; + + return &driver->base; +} diff --git a/src/gallium/drivers/panfrost/pan_drm.h b/src/gallium/drivers/panfrost/pan_drm.h new file mode 100644 index 00000000000..e94907aa983 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_drm.h @@ -0,0 +1,32 @@ +/* + * © Copyright 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_DRM_H__ +#define __PAN_DRM_H__ + +#include "pan_screen.h" + +struct panfrost_driver *panfrost_create_drm_driver(int fd); + +#endif /* __PAN_DRM_H__ */ diff --git a/src/gallium/drivers/panfrost/pan_format.c b/src/gallium/drivers/panfrost/pan_format.c new file mode 100644 index 00000000000..b81206412ae --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_format.c @@ -0,0 +1,220 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "pan_format.h" + +/* From panwrap/panwrap-decoder, but we don't want to bring in all those headers */ +char *panwrap_format_name(enum mali_format format); + +/* Construct a default swizzle based on the number of components */ + +static unsigned +panfrost_translate_swizzle(enum pipe_swizzle s) +{ + switch (s) { + case PIPE_SWIZZLE_X: + return MALI_CHANNEL_RED; + + case PIPE_SWIZZLE_Y: + return MALI_CHANNEL_GREEN; + + case PIPE_SWIZZLE_Z: + return MALI_CHANNEL_BLUE; + + case PIPE_SWIZZLE_W: + return MALI_CHANNEL_ALPHA; + + case PIPE_SWIZZLE_0: + case PIPE_SWIZZLE_NONE: + return MALI_CHANNEL_ZERO; + + case PIPE_SWIZZLE_1: + return MALI_CHANNEL_ONE; + + default: + assert(0); + return 0; + } +} + +/* Translate a Gallium swizzle quad to a 12-bit Mali swizzle code */ + +unsigned +panfrost_translate_swizzle_4(const unsigned char swizzle[4]) +{ + unsigned out = 0; + + for (unsigned i = 0; i < 4; ++i) { + unsigned translated = panfrost_translate_swizzle(swizzle[i]); + out |= (translated << (3*i)); + } + + return out; +} + +unsigned +panfrost_get_default_swizzle(unsigned components) +{ + unsigned char default_swizzles[4][4] = { + {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1}, + {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1}, + {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_1}, + {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W}, + }; + + assert(components >= 1 && components <= 4); + return panfrost_translate_swizzle_4(default_swizzles[components - 1]); +} + +static unsigned +panfrost_translate_channel_width(unsigned size) +{ + switch (size) { + case 8: + return MALI_CHANNEL_8; + case 16: + return MALI_CHANNEL_16; + case 32: + return MALI_CHANNEL_32; + default: + fprintf(stderr, "Unknown width %d\n", size); + assert(0); + return 0; + } +} + +static unsigned +panfrost_translate_channel_type(unsigned type, unsigned size, bool norm) { + switch (type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + return norm ? MALI_FORMAT_UNORM : MALI_FORMAT_UINT; + + case UTIL_FORMAT_TYPE_SIGNED: + return norm ? MALI_FORMAT_SNORM : MALI_FORMAT_SINT; + + case UTIL_FORMAT_TYPE_FLOAT: + if (size == 16) { + /* With FLOAT, fp16 */ + return MALI_FORMAT_SINT; + } else if (size == 32) { + /* With FLOAT< fp32 */ + return MALI_FORMAT_UNORM; + } else { + assert(0); + return 0; + } + + default: + assert(0); + return 0; + } +} + +/* Constructs a mali_format satisfying the specified Gallium format + * description */ + +enum mali_format +panfrost_find_format(const struct util_format_description *desc) +{ + /* Find first non-VOID channel */ + struct util_format_channel_description chan = desc->channel[0]; + + for (unsigned c = 0; c < 4; ++c) { + if (desc->channel[c].type == UTIL_FORMAT_TYPE_VOID) + continue; + + chan = desc->channel[c]; + break; + } + + /* Check for special formats */ + switch (desc->format) { + case PIPE_FORMAT_YV12: + case PIPE_FORMAT_YV16: + case PIPE_FORMAT_IYUV: + case PIPE_FORMAT_NV21: + fprintf(stderr, "YUV format type %s (%d) is not yet supported, but it's probably close to NV12!\n", desc->name, desc->format); + assert(0); + break; + + case PIPE_FORMAT_NV12: + return MALI_NV12; + + case PIPE_FORMAT_R10G10B10X2_UNORM: + case PIPE_FORMAT_B10G10R10X2_UNORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + return MALI_RGB10_A2_UNORM; + + case PIPE_FORMAT_R10G10B10X2_SNORM: + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + return MALI_RGB10_A2_SNORM; + + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_B10G10R10A2_UINT: + return MALI_RGB10_A2UI; + + /* TODO: ZS isn't really special case */ + case PIPE_FORMAT_Z32_UNORM: + return MALI_Z32_UNORM; + + default: + /* Fallthrough to default */ + break; + } + + /* Formats must match in channel count */ + assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); + unsigned format = MALI_NR_CHANNELS(desc->nr_channels); + + switch (chan.type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_FIXED: + /* Channel width */ + format |= panfrost_translate_channel_width(chan.size); + + /* Channel type */ + format |= panfrost_translate_channel_type(chan.type, chan.size, chan.normalized); + break; + + case UTIL_FORMAT_TYPE_FLOAT: + /* Float formats use a special width and encode width + * with type mixed */ + + format |= MALI_CHANNEL_FLOAT; + format |= panfrost_translate_channel_type(chan.type, chan.size, chan.normalized); + break; + + default: + fprintf(stderr, "Unknown format type in %s\n", desc->name); + assert(0); + break; + } + + return (enum mali_format) format; +} + + diff --git a/src/gallium/drivers/panfrost/pan_format.h b/src/gallium/drivers/panfrost/pan_format.h new file mode 100644 index 00000000000..a44d1d80994 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_format.h @@ -0,0 +1,42 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_FORMAT_H__ +#define __PAN_FORMAT_H__ + +#include "pan_context.h" +#include "util/u_format.h" + +unsigned +panfrost_translate_swizzle_4(const unsigned char swizzle[4]); + +unsigned +panfrost_get_default_swizzle(unsigned components); + +enum mali_format +panfrost_find_format(const struct util_format_description *desc); + +#endif + + diff --git a/src/gallium/drivers/panfrost/pan_pretty_print.c b/src/gallium/drivers/panfrost/pan_pretty_print.c new file mode 100644 index 00000000000..fd8ad40d407 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_pretty_print.c @@ -0,0 +1,224 @@ +/* + * © Copyright 2017-2098 The Panfrost Communiy + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pan_pretty_print.h" + +#include <stdio.h> +#include <string.h> +#include <assert.h> + +/* Some self-contained prettyprinting functions shared between panwrap and + * the main driver */ + +#define DEFINE_CASE(name) case MALI_## name: return "MALI_" #name +char *panwrap_format_name(enum mali_format format) +{ + static char unk_format_str[5]; + + switch (format) { + DEFINE_CASE(RGB10_A2_UNORM); + DEFINE_CASE(RGB10_A2_SNORM); + DEFINE_CASE(RGB10_A2UI); + DEFINE_CASE(RGB10_A2I); + DEFINE_CASE(NV12); + DEFINE_CASE(Z32_UNORM); + DEFINE_CASE(R32_FIXED); + DEFINE_CASE(RG32_FIXED); + DEFINE_CASE(RGB32_FIXED); + DEFINE_CASE(RGBA32_FIXED); + DEFINE_CASE(R11F_G11F_B10F); + DEFINE_CASE(VARYING_POS); + DEFINE_CASE(VARYING_DISCARD); + + DEFINE_CASE(R8_SNORM); + DEFINE_CASE(R16_SNORM); + DEFINE_CASE(R32_SNORM); + DEFINE_CASE(RG8_SNORM); + DEFINE_CASE(RG16_SNORM); + DEFINE_CASE(RG32_SNORM); + DEFINE_CASE(RGB8_SNORM); + DEFINE_CASE(RGB16_SNORM); + DEFINE_CASE(RGB32_SNORM); + DEFINE_CASE(RGBA8_SNORM); + DEFINE_CASE(RGBA16_SNORM); + DEFINE_CASE(RGBA32_SNORM); + + DEFINE_CASE(R8UI); + DEFINE_CASE(R16UI); + DEFINE_CASE(R32UI); + DEFINE_CASE(RG8UI); + DEFINE_CASE(RG16UI); + DEFINE_CASE(RG32UI); + DEFINE_CASE(RGB8UI); + DEFINE_CASE(RGB16UI); + DEFINE_CASE(RGB32UI); + DEFINE_CASE(RGBA8UI); + DEFINE_CASE(RGBA16UI); + DEFINE_CASE(RGBA32UI); + + DEFINE_CASE(R8_UNORM); + DEFINE_CASE(R16_UNORM); + DEFINE_CASE(R32_UNORM); + DEFINE_CASE(R32F); + DEFINE_CASE(RG8_UNORM); + DEFINE_CASE(RG16_UNORM); + DEFINE_CASE(RG32_UNORM); + DEFINE_CASE(RG32F); + DEFINE_CASE(RGB8_UNORM); + DEFINE_CASE(RGB16_UNORM); + DEFINE_CASE(RGB32_UNORM); + DEFINE_CASE(RGB32F); + DEFINE_CASE(RGBA8_UNORM); + DEFINE_CASE(RGBA16_UNORM); + DEFINE_CASE(RGBA32_UNORM); + DEFINE_CASE(RGBA32F); + + DEFINE_CASE(R8I); + DEFINE_CASE(R16I); + DEFINE_CASE(R32I); + DEFINE_CASE(RG8I); + DEFINE_CASE(R16F); + DEFINE_CASE(RG16I); + DEFINE_CASE(RG32I); + DEFINE_CASE(RG16F); + DEFINE_CASE(RGB8I); + DEFINE_CASE(RGB16I); + DEFINE_CASE(RGB32I); + DEFINE_CASE(RGB16F); + DEFINE_CASE(RGBA8I); + DEFINE_CASE(RGBA16I); + DEFINE_CASE(RGBA32I); + DEFINE_CASE(RGBA16F); + + DEFINE_CASE(RGBA4); + DEFINE_CASE(RGBA8_2); + DEFINE_CASE(RGB10_A2_2); + default: + snprintf(unk_format_str, sizeof(unk_format_str), "0x%02x", format); + return unk_format_str; + } +} + +#undef DEFINE_CASE + +/* Helper to dump fixed-function blend part for debugging */ + +static const char * +panfrost_factor_name(enum mali_dominant_factor factor) +{ + switch (factor) { + case MALI_DOMINANT_UNK0: + return "unk0"; + + case MALI_DOMINANT_ZERO: + return "zero"; + + case MALI_DOMINANT_SRC_COLOR: + return "source color"; + + case MALI_DOMINANT_DST_COLOR: + return "dest color"; + + case MALI_DOMINANT_UNK4: + return "unk4"; + + case MALI_DOMINANT_SRC_ALPHA: + return "source alpha"; + + case MALI_DOMINANT_DST_ALPHA: + return "dest alpha"; + + case MALI_DOMINANT_CONSTANT: + return "constant"; + } + + return "unreachable"; +} + +static const char * +panfrost_modifier_name(enum mali_blend_modifier mod) +{ + switch (mod) { + case MALI_BLEND_MOD_UNK0: + return "unk0"; + + case MALI_BLEND_MOD_NORMAL: + return "normal"; + + case MALI_BLEND_MOD_SOURCE_ONE: + return "source one"; + + case MALI_BLEND_MOD_DEST_ONE: + return "dest one"; + } + + return "unreachable"; +} + +static void +panfrost_print_fixed_part(const char *name, unsigned u) +{ + struct mali_blend_mode part; + memcpy(&part, &u, sizeof(part)); + + printf("%s blend mode (%X):\n", name, u); + + printf(" %s dominant:\n", + (part.dominant == MALI_BLEND_DOM_SOURCE) ? "source" : "destination"); + + printf(" %s\n", panfrost_factor_name(part.dominant_factor)); + + if (part.complement_dominant) + printf(" complement\n"); + + + printf(" nondominant %s\n", + (part.nondominant_mode == MALI_BLEND_NON_MIRROR) ? "mirror" : "zero"); + + + printf(" mode: %s\n", panfrost_modifier_name(part.clip_modifier)); + + if (part.negate_source) printf(" negate source\n"); + + if (part.negate_dest) printf(" negate dest\n"); + + assert(!(part.unused_0 || part.unused_1)); +} + +void +panfrost_print_blend_equation(struct mali_blend_equation eq) +{ + printf("\n"); + panfrost_print_fixed_part("RGB", eq.rgb_mode); + panfrost_print_fixed_part("Alpha", eq.alpha_mode); + + assert(!eq.zero1); + + printf("Mask: %s%s%s%s\n", + (eq.color_mask & MALI_MASK_R) ? "R" : "", + (eq.color_mask & MALI_MASK_G) ? "G" : "", + (eq.color_mask & MALI_MASK_B) ? "B" : "", + (eq.color_mask & MALI_MASK_A) ? "A" : ""); + + printf("Constant: %f\n", eq.constant); +} diff --git a/src/gallium/drivers/panfrost/pan_pretty_print.h b/src/gallium/drivers/panfrost/pan_pretty_print.h new file mode 100644 index 00000000000..a781ceaf582 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_pretty_print.h @@ -0,0 +1,32 @@ +/* + * © Copyright 2017-2098 The Panfrost Communiy + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __PAN_PRETTY_H +#define __PAN_PRETTY_H + +#include "panfrost-job.h" + +char *panwrap_format_name(enum mali_format format); +void panfrost_print_blend_equation(struct mali_blend_equation eq); + +#endif diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c new file mode 100644 index 00000000000..de20fc79099 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -0,0 +1,432 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * Copyright 2014 Broadcom + * Copyright 2018 Alyssa Rosenzweig + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <xf86drm.h> +#include <fcntl.h> +#include <drm_fourcc.h> + +#include "state_tracker/winsys_handle.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_surface.h" +#include "util/u_transfer.h" +#include "util/u_transfer_helper.h" + +#include "pan_context.h" +#include "pan_screen.h" +#include "pan_resource.h" +#include "pan_swizzle.h" + +static struct pipe_resource * +panfrost_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + struct winsys_handle *whandle, + unsigned usage) +{ + struct panfrost_screen *screen = pan_screen(pscreen); + struct panfrost_resource *rsc; + struct pipe_resource *prsc; + + assert(whandle->type == WINSYS_HANDLE_TYPE_FD); + + rsc = CALLOC_STRUCT(panfrost_resource); + if (!rsc) + return NULL; + + prsc = &rsc->base; + + *prsc = *templat; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + rsc->bo = screen->driver->import_bo(screen, whandle); + + return prsc; +} + +static boolean +panfrost_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_resource *pt, + struct winsys_handle *handle, + unsigned usage) +{ + struct panfrost_screen *screen = pan_screen(pscreen); + struct panfrost_resource *rsrc = (struct panfrost_resource *) pt; + struct renderonly_scanout *scanout = rsrc->scanout; + int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); + int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ + + handle->stride = stride; + handle->modifier = DRM_FORMAT_MOD_INVALID; + + if (handle->type == WINSYS_HANDLE_TYPE_SHARED) { + printf("Missed shared handle\n"); + return FALSE; + } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) { + if (renderonly_get_handle(scanout, handle)) { + return TRUE; + } else { + printf("Missed nonrenderonly KMS handle for resource %p with scanout %p\n", pt, scanout); + return FALSE; + } + } else if (handle->type == WINSYS_HANDLE_TYPE_FD) { + if (scanout) { + struct drm_prime_handle args = { + .handle = scanout->handle, + .flags = DRM_CLOEXEC, + }; + + int ret = drmIoctl(screen->ro->kms_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return FALSE; + + handle->handle = args.fd; + + return TRUE; + } else { + printf("Missed nonscanout FD handle\n"); + assert(0); + return FALSE; + } + } + + return FALSE; +} + +static void +panfrost_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + //fprintf(stderr, "TODO %s\n", __func__); +} + +static void +panfrost_blit(struct pipe_context *pipe, + const struct pipe_blit_info *info) +{ + /* STUB */ + printf("Skipping blit XXX\n"); + return; +} + +static struct pipe_surface * +panfrost_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *ps = NULL; + + ps = CALLOC_STRUCT(pipe_surface); + + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = surf_tmpl->format; + + if (pt->target != PIPE_BUFFER) { + assert(surf_tmpl->u.tex.level <= pt->last_level); + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + } else { + /* setting width as number of elements should get us correct renderbuffer width */ + ps->width = surf_tmpl->u.buf.last_element - surf_tmpl->u.buf.first_element + 1; + ps->height = pt->height0; + ps->u.buf.first_element = surf_tmpl->u.buf.first_element; + ps->u.buf.last_element = surf_tmpl->u.buf.last_element; + assert(ps->u.buf.first_element <= ps->u.buf.last_element); + assert(ps->u.buf.last_element < ps->width); + } + } + + return ps; +} + +static void +panfrost_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surf) +{ + assert(surf->texture); + pipe_resource_reference(&surf->texture, NULL); + free(surf); +} + +/* TODO: Proper resource tracking depends on, well, proper resources. This + * section will be woefully incomplete until we can sort out a proper DRM + * driver. */ + +static struct pipe_resource * +panfrost_resource_create(struct pipe_screen *screen, + const struct pipe_resource *template) +{ + struct panfrost_resource *so = CALLOC_STRUCT(panfrost_resource); + struct panfrost_screen *pscreen = (struct panfrost_screen *) screen; + + so->base = *template; + so->base.screen = screen; + + pipe_reference_init(&so->base.reference, 1); + + /* Make sure we're familiar */ + switch (template->target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_RECT: + break; + default: + fprintf(stderr, "Unknown texture target %d\n", template->target); + assert(0); + } + + if ((template->bind & PIPE_BIND_RENDER_TARGET) || (template->bind & PIPE_BIND_DEPTH_STENCIL)) { + if (template->bind & PIPE_BIND_DISPLAY_TARGET || + template->bind & PIPE_BIND_SCANOUT || + template->bind & PIPE_BIND_SHARED) { + struct pipe_resource scanout_templat = *template; + struct renderonly_scanout *scanout; + struct winsys_handle handle; + + /* TODO: align width0 and height0? */ + + scanout = renderonly_scanout_for_resource(&scanout_templat, + pscreen->ro, &handle); + if (!scanout) + return NULL; + + assert(handle.type == WINSYS_HANDLE_TYPE_FD); + /* TODO: handle modifiers? */ + so = pan_resource(screen->resource_from_handle(screen, template, + &handle, + PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)); + close(handle.handle); + if (!so) + return NULL; + + so->scanout = scanout; + pscreen->display_target = so; + } else { + so->bo = pscreen->driver->create_bo(pscreen, template); + } + } else { + so->bo = pscreen->driver->create_bo(pscreen, template); + } + + printf("Created resource %p with scanout %p\n", so, so->scanout); + + return (struct pipe_resource *)so; +} + +static void +panfrost_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *pt) +{ + struct panfrost_screen *pscreen = panfrost_screen(screen); + struct panfrost_resource *rsrc = (struct panfrost_resource *) pt; + + if (rsrc->scanout) + renderonly_scanout_destroy(rsrc->scanout, pscreen->ro); + + if (rsrc->bo) + pscreen->driver->destroy_bo(pscreen, rsrc->bo); + + FREE(rsrc); +} + +static void * +panfrost_transfer_map(struct pipe_context *pctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, /* a combination of PIPE_TRANSFER_x */ + const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct panfrost_context *ctx = pan_context(pctx); + struct panfrost_screen *screen = panfrost_screen(pctx->screen); + int bytes_per_pixel = util_format_get_blocksize(resource->format); + int stride = bytes_per_pixel * resource->width0; /* TODO: Alignment? */ + uint8_t *cpu; + + struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = stride; + assert(!transfer->box.z); + + pipe_resource_reference(&transfer->resource, resource); + + *out_transfer = transfer; + + if (resource->bind & PIPE_BIND_DISPLAY_TARGET || + resource->bind & PIPE_BIND_SCANOUT || + resource->bind & PIPE_BIND_SHARED) { + /* Mipmapped readpixels?! */ + assert(level == 0); + + /* Force a flush -- kill the pipeline */ + panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME); + } + + cpu = screen->driver->map_bo(ctx, transfer); + if (cpu == NULL) + return NULL; + + return cpu + transfer->box.x * bytes_per_pixel + transfer->box.y * stride; +} + +static void +panfrost_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct panfrost_context *ctx = pan_context(pctx); + struct panfrost_screen *screen = pan_screen(pctx->screen); + + screen->driver->unmap_bo(ctx, transfer); + + /* Derefence the resource */ + pipe_resource_reference(&transfer->resource, NULL); + + /* Transfer itself is CALLOCed at the moment */ + free(transfer); +} + + +static struct pb_slab * +panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index) +{ + struct panfrost_screen *screen = (struct panfrost_screen *) priv; + struct panfrost_memory *mem = CALLOC_STRUCT(panfrost_memory); + + size_t slab_size = (1 << (MAX_SLAB_ENTRY_SIZE + 1)); + + mem->slab.num_entries = slab_size / entry_size; + mem->slab.num_free = mem->slab.num_entries; + + LIST_INITHEAD(&mem->slab.free); + for (unsigned i = 0; i < mem->slab.num_entries; ++i) { + /* Create a slab entry */ + struct panfrost_memory_entry *entry = CALLOC_STRUCT(panfrost_memory_entry); + entry->offset = entry_size * i; + + entry->base.slab = &mem->slab; + entry->base.group_index = group_index; + + LIST_ADDTAIL(&entry->base.head, &mem->slab.free); + } + + /* Actually allocate the memory from kernel-space. Mapped, same_va, no + * special flags */ + + screen->driver->allocate_slab(screen, mem, slab_size / 4096, true, 0, 0, 0); + + return &mem->slab; +} + +static bool +panfrost_slab_can_reclaim(void *priv, struct pb_slab_entry *entry) +{ + struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry; + return p_entry->freed; +} + +static void +panfrost_slab_free(void *priv, struct pb_slab *slab) +{ + /* STUB */ + //struct panfrost_memory *mem = (struct panfrost_memory *) slab; + printf("stub: Tried to free slab\n"); +} + +static void +panfrost_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + //fprintf(stderr, "TODO %s\n", __func__); +} + +static const struct u_transfer_vtbl transfer_vtbl = { + .resource_create = panfrost_resource_create, + .resource_destroy = panfrost_resource_destroy, + .transfer_map = panfrost_transfer_map, + .transfer_unmap = panfrost_transfer_unmap, + .transfer_flush_region = u_default_transfer_flush_region, + //.get_internal_format = panfrost_resource_get_internal_format, + //.set_stencil = panfrost_resource_set_stencil, + //.get_stencil = panfrost_resource_get_stencil, +}; + +void +panfrost_resource_screen_init(struct panfrost_screen *pscreen) +{ + //pscreen->base.resource_create_with_modifiers = + // panfrost_resource_create_with_modifiers; + pscreen->base.resource_create = u_transfer_helper_resource_create; + pscreen->base.resource_destroy = u_transfer_helper_resource_destroy; + pscreen->base.resource_from_handle = panfrost_resource_from_handle; + pscreen->base.resource_get_handle = panfrost_resource_get_handle; + pscreen->base.transfer_helper = u_transfer_helper_create(&transfer_vtbl, + true, true, + true, true); + + pb_slabs_init(&pscreen->slabs, + MIN_SLAB_ENTRY_SIZE, + MAX_SLAB_ENTRY_SIZE, + + 3, /* Number of heaps */ + + pscreen, + + panfrost_slab_can_reclaim, + panfrost_slab_alloc, + panfrost_slab_free); +} + +void +panfrost_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_helper_transfer_map; + pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; + pctx->transfer_unmap = u_transfer_helper_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->create_surface = panfrost_create_surface; + pctx->surface_destroy = panfrost_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = panfrost_blit; + //pctx->generate_mipmap = panfrost_generate_mipmap; + pctx->flush_resource = panfrost_flush_resource; + pctx->invalidate_resource = panfrost_invalidate_resource; + pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; +} diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 0fe90db0b0a..d2ea5a692f5 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -48,6 +48,9 @@ #include "pan_public.h" #include "pan_context.h" +#include "midgard/midgard_compile.h" + +#include "pan_drm.h" static const char * panfrost_get_name(struct pipe_screen *screen) @@ -513,7 +516,7 @@ panfrost_screen_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, enum pipe_shader_type shader) { - return NULL; + return &midgard_nir_options; } struct pipe_screen * @@ -533,6 +536,18 @@ panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm) } } + if (is_drm) { + screen->driver = panfrost_create_drm_driver(fd); + } else { + fprintf(stderr, "Legacy (non-DRM) drivers are not supported in upstream Mesa\n"); + return NULL; + } + +#ifdef DUMP_PERFORMANCE_COUNTERS + screen->driver->allocate_slab(screen, &screen->perf_counters, 64, true, 0, 0, 0); + screen->driver->enable_counters(screen); +#endif + screen->base.destroy = panfrost_destroy_screen; screen->base.get_name = panfrost_get_name; @@ -543,7 +558,7 @@ panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm) screen->base.get_paramf = panfrost_get_paramf; screen->base.get_timestamp = panfrost_get_timestamp; screen->base.is_format_supported = panfrost_is_format_supported; - //screen->base.context_create = panfrost_create_context; + screen->base.context_create = panfrost_create_context; screen->base.flush_frontbuffer = panfrost_flush_frontbuffer; screen->base.get_compiler_options = panfrost_screen_get_compiler_options; screen->base.fence_reference = panfrost_fence_reference; @@ -552,6 +567,7 @@ panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm) screen->last_fragment_id = -1; screen->last_fragment_flushed = true; - fprintf(stderr, "stub: Upstream panfrost (use downstream fork)\n"); - return NULL; + panfrost_resource_screen_init(screen); + + return &screen->base; } diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 07e2ad399f0..4c8fe8dd720 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -42,6 +42,10 @@ struct panfrost_screen; //#define DUMP_PERFORMANCE_COUNTERS +/* Flags for allocated memory */ +#define PAN_ALLOCATE_EXECUTE (1 << 0) +#define PAN_ALLOCATE_GROWABLE (1 << 1) + struct panfrost_driver { struct panfrost_bo * (*create_bo) (struct panfrost_screen *screen, const struct pipe_resource *template); struct panfrost_bo * (*import_bo) (struct panfrost_screen *screen, struct winsys_handle *whandle); @@ -49,7 +53,7 @@ struct panfrost_driver { void (*unmap_bo) (struct panfrost_context *ctx, struct pipe_transfer *transfer); void (*destroy_bo) (struct panfrost_screen *screen, struct panfrost_bo *bo); - void (*submit_job) (struct panfrost_context *ctx, mali_ptr addr, int nr_atoms); + int (*submit_vs_fs_job) (struct panfrost_context *ctx, bool has_draws); void (*force_flush_fragment) (struct panfrost_context *ctx); void (*allocate_slab) (struct panfrost_screen *screen, struct panfrost_memory *mem, diff --git a/src/gallium/drivers/panfrost/pan_swizzle.c b/src/gallium/drivers/panfrost/pan_swizzle.c new file mode 100644 index 00000000000..c021843846d --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_swizzle.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2012-2013 Luc Verhaegen <[email protected]> + * Copyright (c) 2018 Alyssa Rosenzweig <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include "pan_swizzle.h" +#include "pan_allocate.h" + +/* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would + * become 0b10101 */ + +static inline int +space_bits_4(int i) +{ + return ((i & 0x8) << 3) | + ((i & 0x4) << 2) | + ((i & 0x2) << 1) | + ((i & 0x1) << 0); +} + +/* Generate lookup table for the space filler curve. Note this is a 1:1 + * mapping, just with bits twiddled around. */ + +uint32_t space_filler[16][16]; +uint32_t space_filler_packed4[16][4]; + +void +panfrost_generate_space_filler_indices() +{ + for (int y = 0; y < 16; ++y) { + for (int x = 0; x < 16; ++x) { + space_filler[y][x] = + space_bits_4(y ^ x) | (space_bits_4(y) << 1); + } + + for (int q = 0; q < 4; ++q) { + space_filler_packed4[y][q] = + (space_filler[y][(q * 4) + 0] << 0) | + (space_filler[y][(q * 4) + 1] << 8) | + (space_filler[y][(q * 4) + 2] << 16) | + (space_filler[y][(q * 4) + 3] << 24); + } + } +} + +static void +swizzle_bpp1_align16(int width, int height, int source_stride, int block_pitch, + const uint8_t *pixels, + uint8_t *ldest) +{ + for (int y = 0; y < height; ++y) { + { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint8_t *block_start_s = ldest + (block_y * block_pitch); + const uint8_t *source_start = pixels + (y * source_stride); + const uint8_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + const uint32_t *src_32 = (const uint32_t *) source_start; + + for (int q = 0; q < 4; ++q) { + uint32_t src = src_32[q]; + uint32_t spaced = space_filler_packed4[rem_y][q]; + uint16_t *bs = (uint16_t *) block_start_s; + + int spacedA = (spaced >> 0) & 0xFF; + int spacedB = (spaced >> 16) & 0xFF; + + bs[spacedA >> 1] = (src >> 0) & 0xFFFF; + bs[spacedB >> 1] = (src >> 16) & 0xFFFF; + } + } + } + + ++y; + + { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint8_t *block_start_s = ldest + (block_y * block_pitch); + const uint8_t *source_start = pixels + (y * source_stride); + const uint8_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + const uint32_t *src_32 = (const uint32_t *) source_start; + + for (int q = 0; q < 4; ++q) { + uint32_t src = src_32[q]; + uint32_t spaced = space_filler_packed4[rem_y][q]; + + block_start_s[(spaced >> 0) & 0xFF] = (src >> 0) & 0xFF; + block_start_s[(spaced >> 8) & 0xFF] = (src >> 8) & 0xFF; + + block_start_s[(spaced >> 16) & 0xFF] = (src >> 16) & 0xFF; + block_start_s[(spaced >> 24) & 0xFF] = (src >> 24) & 0xFF; + } + } + } + + } +} + +static void +swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch, + const uint32_t *pixels, + uint32_t *ldest) +{ + for (int y = 0; y < height; ++y) { + int block_y = y & ~(0x0f); + int rem_y = y & 0x0f; + uint32_t *block_start_s = ldest + (block_y * block_pitch); + const uint32_t *source_start = pixels + (y * source_stride); + const uint32_t *source_end = source_start + width; + + /* Operate on blocks of 16 pixels to minimise bookkeeping */ + + for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) { + for (int j = 0; j < 16; ++j) + block_start_s[space_filler[rem_y][j]] = source_start[j]; + } + } +} + +void +panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride, + const uint8_t *pixels, + uint8_t *ldest) +{ + /* Calculate maximum size, overestimating a bit */ + int block_pitch = ALIGN(width, 16) >> 4; + + /* Use fast path if available */ + if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) { + swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest); + return; + } else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) { + swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest); + return; + } + + /* Otherwise, default back on generic path */ + + for (int y = 0; y < height; ++y) { + int block_y = y >> 4; + int rem_y = y & 0x0F; + int block_start_s = block_y * block_pitch * 256; + int source_start = y * source_stride; + + for (int x = 0; x < width; ++x) { + int block_x_s = (x >> 4) * 256; + int rem_x = x & 0x0F; + + int index = space_filler[rem_y][rem_x]; + const uint8_t *source = &pixels[source_start + bytes_per_pixel * x]; + uint8_t *dest = ldest + bytes_per_pixel * (block_start_s + block_x_s + index); + + for (int b = 0; b < bytes_per_pixel; ++b) + dest[b] = source[b]; + } + } +} + + +unsigned +panfrost_swizzled_size(int width, int height, int bytes_per_pixel) +{ + /* Calculate maximum size, overestimating a bit */ + int block_pitch = ALIGN(width, 16) >> 4; + unsigned sz = bytes_per_pixel * 256 * ((height >> 4) + 1) * block_pitch; + + return sz; +} + +#if 0 +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#define TW 1920 +#define TH 1080 +void +main() +{ + panfrost_generate_space_filler_indices(); + + uint8_t in[TW * TH * 4]; + + for (int i = 0; i < TW * TH * 4; ++i) in[i] = i; + + uint8_t *out = malloc(TW * TH * 4 * 2); + + for (int i = 0; i < 60; ++i) { + //swizzle_bpp4_align16(TW, TH, TW*4, TW>>4, (uint32_t *) in, (uint32_t *) out); + //panfrost_texture_swizzle_bpp4(TW, TH, TW*4, (uint32_t *) in, (uint32_t *) out); + //panfrost_texture_swizzle(TW, TH, 4, TW*4, (uint32_t *) in, (uint32_t *) out); + + int block_pitch = ALIGN(TW, 16) >> 4; + swizzle_bpp1_align16(TW, TH, TW, (block_pitch * 256 >> 4), in, (uint8_t *) out); + } + +#if 0 + uint8_t *reference = malloc(TW * TH * 4 * 2); + panfrost_texture_swizzle(TW, TH, 1, TW, (uint8_t *) in, (uint8_t *) reference); + + if (memcmp(reference, out, TW * TH * 4)) printf("XXX\n"); + +#endif + printf("ref %X\n", out[0]); +} +#endif diff --git a/src/gallium/drivers/panfrost/pan_swizzle.h b/src/gallium/drivers/panfrost/pan_swizzle.h new file mode 100644 index 00000000000..d375cfe53a5 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_swizzle.h @@ -0,0 +1,41 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __TEXSWZ_H__ +#define __TEXSWZ_H__ + +#include <stdint.h> + +void +panfrost_generate_space_filler_indices(void); + +void +panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride, + const uint8_t *pixels, + uint8_t *ldest); + +unsigned +panfrost_swizzled_size(int width, int height, int bytes_per_pixel); + +#endif diff --git a/src/gallium/drivers/panfrost/pan_wallpaper.c b/src/gallium/drivers/panfrost/pan_wallpaper.c new file mode 100644 index 00000000000..735524a49c9 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_wallpaper.c @@ -0,0 +1,275 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "pan_wallpaper.h" +#include "pan_context.h" +#include "pan_screen.h" +//#include "include/panfrost-job.h" +#include "midgard/midgard_compile.h" +#include "compiler/nir/nir_builder.h" + +/* Creates the special-purpose fragment shader for wallpapering. A + * pseudo-vertex shader sets us up for a fullscreen quad render, with a texture + * coordinate varying */ + +static nir_shader * +panfrost_build_wallpaper_program() +{ + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL); + nir_function *fn = nir_function_create(shader, "main"); + nir_function_impl *impl = nir_function_impl_create(fn); + + /* Create the variables variables */ + + nir_variable *c_texcoord = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_TexCoord"); + nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_FragColor"); + + c_texcoord->data.location = VARYING_SLOT_VAR0; + c_out->data.location = FRAG_RESULT_COLOR; + + /* Setup nir_builder */ + + nir_builder _b; + nir_builder *b = &_b; + nir_builder_init(b, impl); + b->cursor = nir_before_block(nir_start_block(impl)); + + /* Setup inputs */ + + nir_ssa_def *s_src = nir_load_var(b, c_texcoord); + + /* Build the passthrough texture shader */ + + nir_tex_instr *tx = nir_tex_instr_create(shader, 1); + tx->op = nir_texop_tex; + tx->texture_index = tx->sampler_index = 0; + tx->sampler_dim = GLSL_SAMPLER_DIM_2D; + tx->dest_type = nir_type_float; + + nir_src src = nir_src_for_ssa(s_src); + nir_src_copy(&tx->src[0].src, &src, tx); + tx->src[0].src_type = nir_tex_src_coord; + + nir_ssa_dest_init(&tx->instr, &tx->dest, nir_tex_instr_dest_size(tx), 32, NULL); + nir_builder_instr_insert(b, &tx->instr); + + nir_ssa_def *texel = &tx->dest.ssa; + + nir_store_var(b, c_out, texel, 0xFF); + + nir_print_shader(shader, stdout); + + return shader; +} + +/* Creates the CSO corresponding to the wallpaper program */ + +static struct panfrost_shader_variants * +panfrost_create_wallpaper_program(struct pipe_context *pctx) +{ + nir_shader *built_nir_shader = panfrost_build_wallpaper_program(); + + struct pipe_shader_state so = { + .type = PIPE_SHADER_IR_NIR, + .ir = { + .nir = built_nir_shader + } + }; + + return pctx->create_fs_state(pctx, &so); +} + +static struct panfrost_shader_variants *wallpaper_program = NULL; +static struct panfrost_shader_variants *wallpaper_saved_program = NULL; + +static void +panfrost_enable_wallpaper_program(struct pipe_context *pctx) +{ + struct panfrost_context *ctx = pan_context(pctx); + + if (!wallpaper_program) { + wallpaper_program = panfrost_create_wallpaper_program(pctx); + } + + /* Push the shader state */ + wallpaper_saved_program = ctx->fs; + + /* Bind the program */ + pctx->bind_fs_state(pctx, wallpaper_program); +} + +static void +panfrost_disable_wallpaper_program(struct pipe_context *pctx) +{ + /* Pop off the shader state */ + pctx->bind_fs_state(pctx, wallpaper_saved_program); +} + +/* Essentially, we insert a fullscreen textured quad, reading from the + * previous frame's framebuffer */ + +void +panfrost_draw_wallpaper(struct pipe_context *pipe) +{ + /* Disable wallpapering for now, but still exercise the shader generation to minimise bit rot */ + + panfrost_enable_wallpaper_program(pipe); + panfrost_disable_wallpaper_program(pipe); + + return; + +#if 0 + struct panfrost_context *ctx = pan_context(pipe); + + /* Setup payload for elided quad. TODO: Refactor draw_vbo so this can + * be a little more DRY */ + + ctx->payload_tiler.draw_start = 0; + ctx->payload_tiler.prefix.draw_mode = MALI_TRIANGLE_STRIP; + ctx->vertex_count = 4; + ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(4); + ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000); + ctx->payload_tiler.prefix.unknown_draw |= 0x18000; + ctx->payload_tiler.prefix.negative_start = 0; + ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(4); + ctx->payload_tiler.prefix.unknown_draw &= ~MALI_DRAW_INDEXED_UINT32; + ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; + + /* Setup the wallpapering program. We need to build the program via + * NIR. */ + + panfrost_enable_wallpaper_program(pipe); + + /* Setup the texture/sampler pair */ + + struct pipe_sampler_view tmpl = { + .target = PIPE_TEXTURE_2D, + .swizzle_r = PIPE_SWIZZLE_X, + .swizzle_g = PIPE_SWIZZLE_Y, + .swizzle_b = PIPE_SWIZZLE_Z, + .swizzle_a = PIPE_SWIZZLE_W + }; + + struct pipe_sampler_state state = { + .min_mip_filter = PIPE_TEX_MIPFILTER_NONE, + .min_img_filter = PIPE_TEX_MIPFILTER_LINEAR, + .mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR, + .wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE, + .wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE, + .wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE, + .normalized_coords = 1 + }; + + struct pipe_resource *rsrc = panfrost_screen(pipe->screen)->display_target; + struct pipe_sampler_state *sampler_state = pipe->create_sampler_state(pipe, &state); + struct pipe_sampler_view *sampler_view = pipe->create_sampler_view(pipe, rsrc, &tmpl); + + /* Bind texture/sampler. TODO: push/pop */ + pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_view); + + panfrost_emit_for_draw(ctx, false); + + /* Elision occurs by essential precomputing the results of the + * implied vertex shader. Insert these results for fullscreen. The + * first two channels are ~screenspace coordinates, whereas the latter + * two are fixed 0.0/1.0 after perspective division. See the vertex + * shader epilogue for more context */ + + float implied_position_varying[] = { + /* The following is correct for scissored clears whose scissor deals with cutoff appropriately */ + +// -1.0, -1.0, 0.0, 1.0, +// -1.0, 65535.0, 0.0, 1.0, +// 65536.0, 1.0, 0.0, 1.0, +// 65536.0, 65536.0, 0.0, 1.0 + + /* The following output is correct for a fullscreen quad with screen size 2048x1600 */ + 0.0, 0.0, 0.0, 1.0, + 0.0, 1600.0, 0.0, 1.0, + 2048.0, 0.0, 0.0, 1.0, + 2048.0, 1280.0, 0.0, 1.0, + }; + + ctx->payload_tiler.postfix.position_varying = panfrost_upload_transient(ctx, implied_position_varying, sizeof(implied_position_varying)); + + /* Similarly, setup the texture coordinate varying, hardcoded to match + * the corners of the screen */ + + float texture_coordinates[] = { + 0.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, + 1.0, 1.0, 0.0, 0.0 + }; + + union mali_attr varyings[1] = { + { + .elements = panfrost_upload_transient(ctx, texture_coordinates, sizeof(texture_coordinates)) | 1, + .stride = sizeof(float) * 4, + .size = sizeof(texture_coordinates) + } + }; + + ctx->payload_tiler.postfix.varyings = panfrost_upload_transient(ctx, varyings, sizeof(varyings)); + + struct mali_attr_meta varying_meta[1] = { + { + .type = MALI_ATYPE_FLOAT, + .nr_components = MALI_POSITIVE(4), + .not_normalised = 1, + .unknown1 = /*0x2c22 - nr_comp=2*/ 0x2a22, + .unknown2 = 0x1 + } + }; + + mali_ptr saved_varying_meta = ctx->payload_tiler.postfix.varying_meta; + ctx->payload_tiler.postfix.varying_meta = panfrost_upload_transient(ctx, varying_meta, sizeof(varying_meta)); + + /* Emit the tiler job */ + struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, true); + struct mali_job_descriptor_header *jd = (struct mali_job_descriptor_header *) tiler.cpu; + ctx->u_tiler_jobs[ctx->tiler_job_count] = jd; + ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; + ctx->draw_count++; + + /* Okay, so we have the tiler job emitted. Since we set elided_tiler + * mode, no dependencies will be set automatically. We don't actually + * want any dependencies, since we go first and we don't need a vertex + * first. That said, we do need the first tiler job to depend on us. + * Its second dep slot will be free (see the panfrost_vertex_tiler_job + * dependency setting algorithm), so fill us in with that + */ + + if (ctx->tiler_job_count > 1) { + ctx->u_tiler_jobs[0]->job_dependency_index_2 = jd->job_index; + } + + printf("Wallpaper boop\n"); + + /* Cleanup */ + panfrost_disable_wallpaper_program(pipe); + ctx->payload_tiler.postfix.varying_meta = saved_varying_meta; +#endif +} diff --git a/src/gallium/drivers/panfrost/pan_wallpaper.h b/src/gallium/drivers/panfrost/pan_wallpaper.h new file mode 100644 index 00000000000..301e45e7227 --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_wallpaper.h @@ -0,0 +1,33 @@ +/* + * © Copyright 2018 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_WALLPAPER_H +#define __PAN_WALLPAPER_H + +#include "pipe/p_state.h" + +void +panfrost_draw_wallpaper(struct pipe_context *pipe); + +#endif |