summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2019-05-19 23:20:34 +0000
committerAlyssa Rosenzweig <[email protected]>2019-05-19 23:37:45 +0000
commit1155446c198f43fcfc7afcb01917f5b3517081c2 (patch)
treed45a72c703ea5ea5b5af4d4ffcf4728d45ab63ec /src
parent9cd8cd26de8c15750dff0268ae5085e5077216b1 (diff)
panfrost/midgard: Split up midgard_compile.c (RA)
This commit moves the register allocator out of midgard_compile.c and into its own midgard_ra.c file. In doing so, a number of dependencies are identified and moved into their own files in turn. midgard_compile.c is still fairly monolithic, but this should help. Code churn, but no functional changes should be introduced by this commit. Signed-off-by: Alyssa Rosenzweig <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/panfrost/meson.build10
-rw-r--r--src/gallium/drivers/panfrost/midgard/compiler.h359
-rw-r--r--src/gallium/drivers/panfrost/midgard/disassemble.c1
-rw-r--r--src/gallium/drivers/panfrost/midgard/helpers.h144
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard.h50
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_compile.c746
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_liveness.c92
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_ops.c188
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_ops.h53
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_print.c124
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_ra.c310
11 files changed, 1149 insertions, 928 deletions
diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 93640a29c4c..075afa05cd9 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -27,6 +27,11 @@ files_panfrost = files(
'pan_resource.h',
'midgard/midgard_compile.c',
+ 'midgard/midgard_print.c',
+ 'midgard/midgard_ra.c',
+ 'midgard/midgard_liveness.c',
+ 'midgard/midgard_ops.c',
+
'midgard/nir_lower_blend.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
@@ -97,6 +102,10 @@ driver_panfrost = declare_dependency(
files_midgard = files(
'midgard/midgard_compile.c',
+ 'midgard/midgard_print.c',
+ 'midgard/midgard_ra.c',
+ 'midgard/midgard_liveness.c',
+ 'midgard/midgard_ops.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
'midgard/cmdline.c',
@@ -153,6 +162,7 @@ files_pandecode = files(
'pan_pretty_print.c',
'midgard/disassemble.c',
+ 'midgard/midgard_ops.c',
'bifrost/disassemble.c',
)
diff --git a/src/gallium/drivers/panfrost/midgard/compiler.h b/src/gallium/drivers/panfrost/midgard/compiler.h
new file mode 100644
index 00000000000..48c6db542a5
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/compiler.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2019 Alyssa Rosenzweig <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MDG_COMPILER_H
+#define _MDG_COMPILER_H
+
+#include "midgard.h"
+#include "helpers.h"
+#include "midgard_compile.h"
+
+#include "util/hash_table.h"
+#include "util/u_dynarray.h"
+#include "util/set.h"
+#include "util/list.h"
+
+#include "main/mtypes.h"
+#include "compiler/nir_types.h"
+#include "compiler/nir/nir.h"
+
+/* Forward declare */
+struct midgard_block;
+
+/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
+ * the hardware), hence why that must be zero. TARGET_DISCARD signals this
+ * instruction is actually a discard op. */
+
+#define TARGET_GOTO 0
+#define TARGET_BREAK 1
+#define TARGET_CONTINUE 2
+#define TARGET_DISCARD 3
+
+typedef struct midgard_branch {
+ /* If conditional, the condition is specified in r31.w */
+ bool conditional;
+
+ /* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
+ bool invert_conditional;
+
+ /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
+ unsigned target_type;
+
+ /* The actual target */
+ union {
+ int target_block;
+ int target_break;
+ int target_continue;
+ };
+} midgard_branch;
+
+/* Instruction arguments represented as block-local SSA indices, rather than
+ * registers. Negative values mean unused. */
+
+typedef struct {
+ int src0;
+ int src1;
+ int dest;
+
+ /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
+ * in. Only valid for ALU ops. */
+ bool inline_constant;
+} ssa_args;
+
+/* Generic in-memory data type repesenting a single logical instruction, rather
+ * than a single instruction group. This is the preferred form for code gen.
+ * Multiple midgard_insturctions will later be combined during scheduling,
+ * though this is not represented in this structure. Its format bridges
+ * the low-level binary representation with the higher level semantic meaning.
+ *
+ * Notably, it allows registers to be specified as block local SSA, for code
+ * emitted before the register allocation pass.
+ */
+
+typedef struct midgard_instruction {
+ /* Must be first for casting */
+ struct list_head link;
+
+ unsigned type; /* ALU, load/store, texture */
+
+ /* If the register allocator has not run yet... */
+ ssa_args ssa_args;
+
+ /* Special fields for an ALU instruction */
+ midgard_reg_info registers;
+
+ /* I.e. (1 << alu_bit) */
+ int unit;
+
+ /* When emitting bundle, should this instruction have a break forced
+ * before it? Used for r31 writes which are valid only within a single
+ * bundle and *need* to happen as early as possible... this is a hack,
+ * TODO remove when we have a scheduler */
+ bool precede_break;
+
+ bool has_constants;
+ float constants[4];
+ uint16_t inline_constant;
+ bool has_blend_constant;
+
+ bool compact_branch;
+ bool writeout;
+ bool prepacked_branch;
+
+ union {
+ midgard_load_store_word load_store;
+ midgard_vector_alu alu;
+ midgard_texture_word texture;
+ midgard_branch_extended branch_extended;
+ uint16_t br_compact;
+
+ /* General branch, rather than packed br_compact. Higher level
+ * than the other components */
+ midgard_branch branch;
+ };
+} midgard_instruction;
+
+typedef struct midgard_block {
+ /* Link to next block. Must be first for mir_get_block */
+ struct list_head link;
+
+ /* List of midgard_instructions emitted for the current block */
+ struct list_head instructions;
+
+ bool is_scheduled;
+
+ /* List of midgard_bundles emitted (after the scheduler has run) */
+ struct util_dynarray bundles;
+
+ /* Number of quadwords _actually_ emitted, as determined after scheduling */
+ unsigned quadword_count;
+
+ /* Successors: always one forward (the block after us), maybe
+ * one backwards (for a backward branch). No need for a second
+ * forward, since graph traversal would get there eventually
+ * anyway */
+ struct midgard_block *successors[2];
+ unsigned nr_successors;
+
+ /* The successors pointer form a graph, and in the case of
+ * complex control flow, this graph has a cycles. To aid
+ * traversal during liveness analysis, we have a visited?
+ * boolean for passes to use as they see fit, provided they
+ * clean up later */
+ bool visited;
+} midgard_block;
+
+typedef struct midgard_bundle {
+ /* Tag for the overall bundle */
+ int tag;
+
+ /* Instructions contained by the bundle */
+ int instruction_count;
+ midgard_instruction instructions[5];
+
+ /* Bundle-wide ALU configuration */
+ int padding;
+ int control;
+ bool has_embedded_constants;
+ float constants[4];
+ bool has_blend_constant;
+
+ uint16_t register_words[8];
+ int register_words_count;
+
+ uint64_t body_words[8];
+ size_t body_size[8];
+ int body_words_count;
+} midgard_bundle;
+
+typedef struct compiler_context {
+ nir_shader *nir;
+ gl_shader_stage stage;
+
+ /* Is internally a blend shader? Depends on stage == FRAGMENT */
+ bool is_blend;
+
+ /* Tracking for blend constant patching */
+ int blend_constant_offset;
+
+ /* Current NIR function */
+ nir_function *func;
+
+ /* Unordered list of midgard_blocks */
+ int block_count;
+ struct list_head blocks;
+
+ midgard_block *initial_block;
+ midgard_block *previous_source_block;
+ midgard_block *final_block;
+
+ /* List of midgard_instructions emitted for the current block */
+ midgard_block *current_block;
+
+ /* The current "depth" of the loop, for disambiguating breaks/continues
+ * when using nested loops */
+ int current_loop_depth;
+
+ /* Constants which have been loaded, for later inlining */
+ struct hash_table_u64 *ssa_constants;
+
+ /* SSA indices to be outputted to corresponding varying offset */
+ struct hash_table_u64 *ssa_varyings;
+
+ /* SSA values / registers which have been aliased. Naively, these
+ * demand a fmov output; instead, we alias them in a later pass to
+ * avoid the wasted op.
+ *
+ * A note on encoding: to avoid dynamic memory management here, rather
+ * than ampping to a pointer, we map to the source index; the key
+ * itself is just the destination index. */
+
+ struct hash_table_u64 *ssa_to_alias;
+ struct set *leftover_ssa_to_alias;
+
+ /* Actual SSA-to-register for RA */
+ struct hash_table_u64 *ssa_to_register;
+
+ /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
+ struct hash_table_u64 *hash_to_temp;
+ int temp_count;
+ int max_hash;
+
+ /* Just the count of the max register used. Higher count => higher
+ * register pressure */
+ int work_registers;
+
+ /* Used for cont/last hinting. Increase when a tex op is added.
+ * Decrease when a tex op is removed. */
+ int texture_op_count;
+
+ /* Mapping of texture register -> SSA index for unaliasing */
+ int texture_index[2];
+
+ /* If any path hits a discard instruction */
+ bool can_discard;
+
+ /* The number of uniforms allowable for the fast path */
+ int uniform_cutoff;
+
+ /* Count of instructions emitted from NIR overall, across all blocks */
+ int instruction_count;
+
+ /* Alpha ref value passed in */
+ float alpha_ref;
+
+ /* The index corresponding to the fragment output */
+ unsigned fragment_output;
+
+ /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+ unsigned sysvals[MAX_SYSVAL_COUNT];
+ unsigned sysval_count;
+ struct hash_table_u64 *sysval_to_id;
+} compiler_context;
+
+/* Helpers for manipulating the above structures (forming the driver IR) */
+
+/* Append instruction to end of current block */
+
+static inline midgard_instruction *
+mir_upload_ins(struct midgard_instruction ins)
+{
+ midgard_instruction *heap = malloc(sizeof(ins));
+ memcpy(heap, &ins, sizeof(ins));
+ return heap;
+}
+
+static inline void
+emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
+{
+ list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
+}
+
+static inline void
+mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
+{
+ list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
+}
+
+static inline void
+mir_remove_instruction(struct midgard_instruction *ins)
+{
+ list_del(&ins->link);
+}
+
+static inline midgard_instruction*
+mir_prev_op(struct midgard_instruction *ins)
+{
+ return list_last_entry(&(ins->link), midgard_instruction, link);
+}
+
+static inline midgard_instruction*
+mir_next_op(struct midgard_instruction *ins)
+{
+ return list_first_entry(&(ins->link), midgard_instruction, link);
+}
+
+#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
+#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
+
+#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
+#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
+#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
+#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
+#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
+#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
+#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
+
+
+static inline midgard_instruction *
+mir_last_in_block(struct midgard_block *block)
+{
+ return list_last_entry(&block->instructions, struct midgard_instruction, link);
+}
+
+static inline midgard_block *
+mir_get_block(compiler_context *ctx, int idx)
+{
+ struct list_head *lst = &ctx->blocks;
+
+ while ((idx--) + 1)
+ lst = lst->next;
+
+ return (struct midgard_block *) lst;
+}
+
+/* MIR printing */
+
+void mir_print_instruction(midgard_instruction *ins);
+void mir_print_block(midgard_block *block);
+void mir_print_shader(compiler_context *ctx);
+
+/* Register allocation */
+
+struct ra_graph;
+
+struct ra_graph* allocate_registers(compiler_context *ctx);
+void install_registers(compiler_context *ctx, struct ra_graph *g);
+bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
+
+#endif
diff --git a/src/gallium/drivers/panfrost/midgard/disassemble.c b/src/gallium/drivers/panfrost/midgard/disassemble.c
index a9e443fa67c..c467e94fc29 100644
--- a/src/gallium/drivers/panfrost/midgard/disassemble.c
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.c
@@ -31,6 +31,7 @@
#include <string.h>
#include "midgard.h"
#include "midgard-parse.h"
+#include "midgard_ops.h"
#include "disassemble.h"
#include "helpers.h"
#include "util/half_float.h"
diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h
index f32a683233a..9d287259a8a 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -1,7 +1,4 @@
-/* Author(s):
- * Alyssa Rosenzweig
- *
- * Copyright (c) 2018 Alyssa Rosenzweig ([email protected])
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig ([email protected])
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,6 +19,9 @@
* THE SOFTWARE.
*/
+#ifndef __MDG_HELPERS_H
+#define __MDG_HELPERS_H
+
#define OP_IS_STORE_VARY(op) (\
op == midgard_op_st_vary_16 || \
op == midgard_op_st_vary_32 \
@@ -150,140 +150,12 @@
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
-/* Table of mapping opcodes to accompanying properties relevant to
- * scheduling/emission/etc */
-
-static struct {
+struct mir_op_props {
const char *name;
unsigned props;
-} alu_opcode_props[256] = {
- [midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
- [midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
- [midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
- [midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
- [midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
- [midgard_alu_op_fround] = {"fround", UNITS_ADD},
- [midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
- [midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
- [midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
- [midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
- [midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
-
- /* Though they output a scalar, they need to run on a vector unit
- * since they process vectors */
- [midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
- [midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
- [midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
-
- /* Incredibly, iadd can run on vmul, etc */
- [midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
- [midgard_alu_op_isub] = {"isub", UNITS_MOST},
- [midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
- [midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
-
- /* For vector comparisons, use ball etc */
- [midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_fle] = {"fle", UNITS_MOST},
- [midgard_alu_op_flt] = {"flt", UNITS_MOST},
- [midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
- [midgard_alu_op_ile] = {"ile", UNITS_MOST},
- [midgard_alu_op_ult] = {"ult", UNITS_MOST},
- [midgard_alu_op_ule] = {"ule", UNITS_MOST},
-
- [midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
- [midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
- [midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
- [midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
-
- [midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
- [midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
- [midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
- [midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
- [midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
- [midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
-
- [midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
- [midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
- [midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
- [midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
- [midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
-
- [midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
- [midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
-
- /* XXX: Test case where it's right on smul but not sadd */
- [midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
-
- [midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
- [midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
- [midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
- [midgard_alu_op_inand] = {"inand", UNITS_MOST},
- [midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
- [midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
- [midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
-
- [midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
- [midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
- [midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
- [midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
- [midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
- [midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
-
- /* These instructions are not yet emitted by the compiler, so
- * don't speculate about units yet */
- [midgard_alu_op_ishladd] = {"ishladd", 0},
-
- [midgard_alu_op_uball_lt] = {"uball_lt", 0},
- [midgard_alu_op_uball_lte] = {"uball_lte", 0},
- [midgard_alu_op_iball_lt] = {"iball_lt", 0},
- [midgard_alu_op_iball_lte] = {"iball_lte", 0},
- [midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
- [midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
- [midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
- [midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
-
- [midgard_alu_op_freduce] = {"freduce", 0},
- [midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
- [midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
- [midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
- [midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
};
-/* Is this opcode that of an integer (regardless of signedness)? Instruction
- * names authoritatively determine types */
-
-static inline bool
-midgard_is_integer_op(int op)
-{
- const char *name = alu_opcode_props[op].name;
-
- if (!name)
- return false;
-
- return (name[0] == 'i') || (name[0] == 'u');
-}
-
-/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
- * conversion between int<->float in which case we do the opposite */
-
-static inline bool
-midgard_is_integer_out_op(int op)
-{
- bool is_int = midgard_is_integer_op(op);
- bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
+/* This file is common, so don't define the tables themselves. #include
+ * midgard_op.h if you need that, or edit midgard_ops.c directly */
- return is_int ^ is_conversion;
-}
+#endif
diff --git a/src/gallium/drivers/panfrost/midgard/midgard.h b/src/gallium/drivers/panfrost/midgard/midgard.h
index d5d6c12f78c..f5cd59cbfb1 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard.h
@@ -536,54 +536,4 @@ __attribute__((__packed__))
}
midgard_texture_word;
-static char *load_store_opcode_names[256] = {
- [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
- [midgard_op_ld_global_id] = "ld_global_id",
-
- [midgard_op_atomic_add] = "atomic_add",
- [midgard_op_atomic_and] = "atomic_and",
- [midgard_op_atomic_or] = "atomic_or",
- [midgard_op_atomic_xor] = "atomic_xor",
- [midgard_op_atomic_imin] = "atomic_imin",
- [midgard_op_atomic_umin] = "atomic_umin",
- [midgard_op_atomic_imax] = "atomic_imax",
- [midgard_op_atomic_umax] = "atomic_umax",
- [midgard_op_atomic_xchg] = "atomic_xchg",
-
- [midgard_op_ld_char] = "ld_char",
- [midgard_op_ld_char2] = "ld_char2",
- [midgard_op_ld_short] = "ld_short",
- [midgard_op_ld_char4] = "ld_char4",
- [midgard_op_ld_short4] = "ld_short4",
- [midgard_op_ld_int4] = "ld_int4",
-
- [midgard_op_ld_attr_32] = "ld_attr_32",
- [midgard_op_ld_attr_16] = "ld_attr_16",
- [midgard_op_ld_attr_32i] = "ld_attr_32i",
-
- [midgard_op_ld_vary_32] = "ld_vary_32",
- [midgard_op_ld_vary_16] = "ld_vary_16",
- [midgard_op_ld_vary_32i] = "ld_vary_32i",
-
- [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
-
- [midgard_op_ld_uniform_16] = "ld_uniform_16",
- [midgard_op_ld_uniform_32] = "ld_uniform_32",
- [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
-
- [midgard_op_st_char] = "st_char",
- [midgard_op_st_char2] = "st_char2",
- [midgard_op_st_char4] = "st_char4",
- [midgard_op_st_short4] = "st_short4",
- [midgard_op_st_int4] = "st_int4",
-
- [midgard_op_st_vary_32] = "st_vary_32",
- [midgard_op_st_vary_16] = "st_vary_16",
- [midgard_op_st_vary_32i] = "st_vary_32i",
-
- [midgard_op_st_image_f] = "st_image_f",
- [midgard_op_st_image_ui] = "st_image_ui",
- [midgard_op_st_image_i] = "st_image_i",
-};
-
#endif
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index fa74b3f9046..640e4a5fb86 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Alyssa Rosenzweig <[email protected]>
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -36,7 +36,6 @@
#include "main/imports.h"
#include "compiler/nir/nir_builder.h"
#include "util/half_float.h"
-#include "util/register_allocate.h"
#include "util/u_debug.h"
#include "util/u_dynarray.h"
#include "util/list.h"
@@ -45,7 +44,9 @@
#include "midgard.h"
#include "midgard_nir.h"
#include "midgard_compile.h"
+#include "midgard_ops.h"
#include "helpers.h"
+#include "compiler.h"
#include "disassemble.h"
@@ -64,138 +65,12 @@ int midgard_debug = 0;
fprintf(stderr, "%s:%d: "fmt, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
-/* Instruction arguments represented as block-local SSA indices, rather than
- * registers. Negative values mean unused. */
-
-typedef struct {
- int src0;
- int src1;
- int dest;
-
- /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
- * in. Only valid for ALU ops. */
- bool inline_constant;
-} ssa_args;
-
-/* Forward declare so midgard_branch can reference */
-struct midgard_block;
-
-/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
- * the hardware), hence why that must be zero. TARGET_DISCARD signals this
- * instruction is actually a discard op. */
-
-#define TARGET_GOTO 0
-#define TARGET_BREAK 1
-#define TARGET_CONTINUE 2
-#define TARGET_DISCARD 3
-
-typedef struct midgard_branch {
- /* If conditional, the condition is specified in r31.w */
- bool conditional;
-
- /* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */
- bool invert_conditional;
-
- /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
- unsigned target_type;
-
- /* The actual target */
- union {
- int target_block;
- int target_break;
- int target_continue;
- };
-} midgard_branch;
-
static bool
midgard_is_branch_unit(unsigned unit)
{
return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
}
-/* Generic in-memory data type repesenting a single logical instruction, rather
- * than a single instruction group. This is the preferred form for code gen.
- * Multiple midgard_insturctions will later be combined during scheduling,
- * though this is not represented in this structure. Its format bridges
- * the low-level binary representation with the higher level semantic meaning.
- *
- * Notably, it allows registers to be specified as block local SSA, for code
- * emitted before the register allocation pass.
- */
-
-typedef struct midgard_instruction {
- /* Must be first for casting */
- struct list_head link;
-
- unsigned type; /* ALU, load/store, texture */
-
- /* If the register allocator has not run yet... */
- ssa_args ssa_args;
-
- /* Special fields for an ALU instruction */
- midgard_reg_info registers;
-
- /* I.e. (1 << alu_bit) */
- int unit;
-
- /* When emitting bundle, should this instruction have a break forced
- * before it? Used for r31 writes which are valid only within a single
- * bundle and *need* to happen as early as possible... this is a hack,
- * TODO remove when we have a scheduler */
- bool precede_break;
-
- bool has_constants;
- float constants[4];
- uint16_t inline_constant;
- bool has_blend_constant;
-
- bool compact_branch;
- bool writeout;
- bool prepacked_branch;
-
- union {
- midgard_load_store_word load_store;
- midgard_vector_alu alu;
- midgard_texture_word texture;
- midgard_branch_extended branch_extended;
- uint16_t br_compact;
-
- /* General branch, rather than packed br_compact. Higher level
- * than the other components */
- midgard_branch branch;
- };
-} midgard_instruction;
-
-typedef struct midgard_block {
- /* Link to next block. Must be first for mir_get_block */
- struct list_head link;
-
- /* List of midgard_instructions emitted for the current block */
- struct list_head instructions;
-
- bool is_scheduled;
-
- /* List of midgard_bundles emitted (after the scheduler has run) */
- struct util_dynarray bundles;
-
- /* Number of quadwords _actually_ emitted, as determined after scheduling */
- unsigned quadword_count;
-
- /* Successors: always one forward (the block after us), maybe
- * one backwards (for a backward branch). No need for a second
- * forward, since graph traversal would get there eventually
- * anyway */
- struct midgard_block *successors[2];
- unsigned nr_successors;
-
- /* The successors pointer form a graph, and in the case of
- * complex control flow, this graph has a cycles. To aid
- * traversal during liveness analysis, we have a visited?
- * boolean for passes to use as they see fit, provided they
- * clean up later */
- bool visited;
-} midgard_block;
-
static void
midgard_block_add_successor(midgard_block *block, midgard_block *successor)
{
@@ -404,267 +279,6 @@ midgard_create_branch_extended( midgard_condition cond,
return branch;
}
-typedef struct midgard_bundle {
- /* Tag for the overall bundle */
- int tag;
-
- /* Instructions contained by the bundle */
- int instruction_count;
- midgard_instruction instructions[5];
-
- /* Bundle-wide ALU configuration */
- int padding;
- int control;
- bool has_embedded_constants;
- float constants[4];
- bool has_blend_constant;
-
- uint16_t register_words[8];
- int register_words_count;
-
- uint64_t body_words[8];
- size_t body_size[8];
- int body_words_count;
-} midgard_bundle;
-
-typedef struct compiler_context {
- nir_shader *nir;
- gl_shader_stage stage;
-
- /* Is internally a blend shader? Depends on stage == FRAGMENT */
- bool is_blend;
-
- /* Tracking for blend constant patching */
- int blend_constant_offset;
-
- /* Current NIR function */
- nir_function *func;
-
- /* Unordered list of midgard_blocks */
- int block_count;
- struct list_head blocks;
-
- midgard_block *initial_block;
- midgard_block *previous_source_block;
- midgard_block *final_block;
-
- /* List of midgard_instructions emitted for the current block */
- midgard_block *current_block;
-
- /* The current "depth" of the loop, for disambiguating breaks/continues
- * when using nested loops */
- int current_loop_depth;
-
- /* Constants which have been loaded, for later inlining */
- struct hash_table_u64 *ssa_constants;
-
- /* SSA indices to be outputted to corresponding varying offset */
- struct hash_table_u64 *ssa_varyings;
-
- /* SSA values / registers which have been aliased. Naively, these
- * demand a fmov output; instead, we alias them in a later pass to
- * avoid the wasted op.
- *
- * A note on encoding: to avoid dynamic memory management here, rather
- * than ampping to a pointer, we map to the source index; the key
- * itself is just the destination index. */
-
- struct hash_table_u64 *ssa_to_alias;
- struct set *leftover_ssa_to_alias;
-
- /* Actual SSA-to-register for RA */
- struct hash_table_u64 *ssa_to_register;
-
- /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
- struct hash_table_u64 *hash_to_temp;
- int temp_count;
- int max_hash;
-
- /* Just the count of the max register used. Higher count => higher
- * register pressure */
- int work_registers;
-
- /* Used for cont/last hinting. Increase when a tex op is added.
- * Decrease when a tex op is removed. */
- int texture_op_count;
-
- /* Mapping of texture register -> SSA index for unaliasing */
- int texture_index[2];
-
- /* If any path hits a discard instruction */
- bool can_discard;
-
- /* The number of uniforms allowable for the fast path */
- int uniform_cutoff;
-
- /* Count of instructions emitted from NIR overall, across all blocks */
- int instruction_count;
-
- /* Alpha ref value passed in */
- float alpha_ref;
-
- /* The index corresponding to the fragment output */
- unsigned fragment_output;
-
- /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
- unsigned sysvals[MAX_SYSVAL_COUNT];
- unsigned sysval_count;
- struct hash_table_u64 *sysval_to_id;
-} compiler_context;
-
-/* Append instruction to end of current block */
-
-static midgard_instruction *
-mir_upload_ins(struct midgard_instruction ins)
-{
- midgard_instruction *heap = malloc(sizeof(ins));
- memcpy(heap, &ins, sizeof(ins));
- return heap;
-}
-
-static void
-emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
-{
- list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
-}
-
-static void
-mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
-{
- list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
-}
-
-static void
-mir_remove_instruction(struct midgard_instruction *ins)
-{
- list_del(&ins->link);
-}
-
-static midgard_instruction*
-mir_prev_op(struct midgard_instruction *ins)
-{
- return list_last_entry(&(ins->link), midgard_instruction, link);
-}
-
-static midgard_instruction*
-mir_next_op(struct midgard_instruction *ins)
-{
- return list_first_entry(&(ins->link), midgard_instruction, link);
-}
-
-#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link)
-#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
-
-#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link)
-#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link)
-#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link)
-#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link)
-#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link)
-#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link)
-#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link)
-
-
-static midgard_instruction *
-mir_last_in_block(struct midgard_block *block)
-{
- return list_last_entry(&block->instructions, struct midgard_instruction, link);
-}
-
-static midgard_block *
-mir_get_block(compiler_context *ctx, int idx)
-{
- struct list_head *lst = &ctx->blocks;
-
- while ((idx--) + 1)
- lst = lst->next;
-
- return (struct midgard_block *) lst;
-}
-
-/* Pretty printer for internal Midgard IR */
-
-static void
-print_mir_source(int source)
-{
- if (source >= SSA_FIXED_MINIMUM) {
- /* Specific register */
- int reg = SSA_REG_FROM_FIXED(source);
-
- /* TODO: Moving threshold */
- if (reg > 16 && reg < 24)
- printf("u%d", 23 - reg);
- else
- printf("r%d", reg);
- } else {
- printf("%d", source);
- }
-}
-
-static void
-print_mir_instruction(midgard_instruction *ins)
-{
- printf("\t");
-
- switch (ins->type) {
- case TAG_ALU_4: {
- midgard_alu_op op = ins->alu.op;
- const char *name = alu_opcode_props[op].name;
-
- if (ins->unit)
- printf("%d.", ins->unit);
-
- printf("%s", name ? name : "??");
- break;
- }
-
- case TAG_LOAD_STORE_4: {
- midgard_load_store_op op = ins->load_store.op;
- const char *name = load_store_opcode_names[op];
-
- assert(name);
- printf("%s", name);
- break;
- }
-
- case TAG_TEXTURE_4: {
- printf("texture");
- break;
- }
-
- default:
- assert(0);
- }
-
- ssa_args *args = &ins->ssa_args;
-
- printf(" %d, ", args->dest);
-
- print_mir_source(args->src0);
- printf(", ");
-
- if (args->inline_constant)
- printf("#%d", ins->inline_constant);
- else
- print_mir_source(args->src1);
-
- if (ins->has_constants)
- printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
-
- printf("\n");
-}
-
-static void
-print_mir_block(midgard_block *block)
-{
- printf("{\n");
-
- mir_foreach_instr_in_block(block, ins) {
- print_mir_instruction(ins);
- }
-
- printf("}\n");
-}
-
static void
attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
{
@@ -976,26 +590,6 @@ effective_writemask(midgard_vector_alu *alu)
}
static unsigned
-find_or_allocate_temp(compiler_context *ctx, unsigned hash)
-{
- if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
- return hash;
-
- unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
-
- if (temp)
- return temp - 1;
-
- /* If no temp is find, allocate one */
- temp = ctx->temp_count++;
- ctx->max_hash = MAX2(ctx->max_hash, hash);
-
- _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
-
- return temp;
-}
-
-static unsigned
nir_src_index(compiler_context *ctx, nir_src *src)
{
if (src->is_ssa)
@@ -1983,338 +1577,6 @@ emit_instr(compiler_context *ctx, struct nir_instr *instr)
}
}
-/* Determine the actual hardware from the index based on the RA results or special values */
-
-static int
-dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
-{
- if (reg >= SSA_FIXED_MINIMUM)
- return SSA_REG_FROM_FIXED(reg);
-
- if (reg >= 0) {
- assert(reg < maxreg);
- assert(g);
- int r = ra_get_node_reg(g, reg);
- ctx->work_registers = MAX2(ctx->work_registers, r);
- return r;
- }
-
- switch (reg) {
- /* fmov style unused */
- case SSA_UNUSED_0:
- return REGISTER_UNUSED;
-
- /* lut style unused */
- case SSA_UNUSED_1:
- return REGISTER_UNUSED;
-
- default:
- DBG("Unknown SSA register alias %d\n", reg);
- assert(0);
- return 31;
- }
-}
-
-static unsigned int
-midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
-{
- /* Choose the first available register to minimise reported register pressure */
-
- for (int i = 0; i < 16; ++i) {
- if (BITSET_TEST(regs, i)) {
- return i;
- }
- }
-
- assert(0);
- return 0;
-}
-
-static bool
-midgard_is_live_in_instr(midgard_instruction *ins, int src)
-{
- if (ins->ssa_args.src0 == src) return true;
- if (ins->ssa_args.src1 == src) return true;
-
- return false;
-}
-
-/* Determine if a variable is live in the successors of a block */
-static bool
-is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
-{
- for (unsigned i = 0; i < bl->nr_successors; ++i) {
- midgard_block *succ = bl->successors[i];
-
- /* If we already visited, the value we're seeking
- * isn't down this path (or we would have short
- * circuited */
-
- if (succ->visited) continue;
-
- /* Otherwise (it's visited *now*), check the block */
-
- succ->visited = true;
-
- mir_foreach_instr_in_block(succ, ins) {
- if (midgard_is_live_in_instr(ins, src))
- return true;
- }
-
- /* ...and also, check *its* successors */
- if (is_live_after_successors(ctx, succ, src))
- return true;
-
- }
-
- /* Welp. We're really not live. */
-
- return false;
-}
-
-static bool
-is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
-{
- /* Check the rest of the block for liveness */
-
- mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
- if (midgard_is_live_in_instr(ins, src))
- return true;
- }
-
- /* Check the rest of the blocks for liveness recursively */
-
- bool succ = is_live_after_successors(ctx, block, src);
-
- mir_foreach_block(ctx, block) {
- block->visited = false;
- }
-
- return succ;
-}
-
-/* Once registers have been decided via register allocation
- * (allocate_registers), we need to rewrite the MIR to use registers instead of
- * SSA */
-
-static void
-install_registers(compiler_context *ctx, struct ra_graph *g)
-{
- mir_foreach_block(ctx, block) {
- mir_foreach_instr_in_block(block, ins) {
- if (ins->compact_branch) continue;
-
- ssa_args args = ins->ssa_args;
-
- switch (ins->type) {
- case TAG_ALU_4:
- ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
-
- ins->registers.src2_imm = args.inline_constant;
-
- if (args.inline_constant) {
- /* Encode inline 16-bit constant as a vector by default */
-
- ins->registers.src2_reg = ins->inline_constant >> 11;
-
- int lower_11 = ins->inline_constant & ((1 << 12) - 1);
-
- uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
- ins->alu.src2 = imm << 2;
- } else {
- ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
- }
-
- ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
-
- break;
-
- case TAG_LOAD_STORE_4: {
- if (OP_IS_STORE_VARY(ins->load_store.op)) {
- /* TODO: use ssa_args for st_vary */
- ins->load_store.reg = 0;
- } else {
- bool has_dest = args.dest >= 0;
- int ssa_arg = has_dest ? args.dest : args.src0;
-
- ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
- }
-
- break;
- }
-
- default:
- break;
- }
- }
- }
-
-}
-
-/* This routine performs the actual register allocation. It should be succeeded
- * by install_registers */
-
-static struct ra_graph *
-allocate_registers(compiler_context *ctx)
-{
- /* First, initialize the RA */
- struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
-
- /* Create a primary (general purpose) class, as well as special purpose
- * pipeline register classes */
-
- int primary_class = ra_alloc_reg_class(regs);
- int varying_class = ra_alloc_reg_class(regs);
-
- /* Add the full set of work registers */
- int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
- for (int i = 0; i < work_count; ++i)
- ra_class_add_reg(regs, primary_class, i);
-
- /* Add special registers */
- ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
- ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
-
- /* We're done setting up */
- ra_set_finalize(regs, NULL);
-
- /* Transform the MIR into squeezed index form */
- mir_foreach_block(ctx, block) {
- mir_foreach_instr_in_block(block, ins) {
- if (ins->compact_branch) continue;
-
- ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
- ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
- ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
- }
- if (midgard_debug & MIDGARD_DBG_SHADERS)
- print_mir_block(block);
- }
-
- /* No register allocation to do with no SSA */
-
- if (!ctx->temp_count)
- return NULL;
-
- /* Let's actually do register allocation */
- int nodes = ctx->temp_count;
- struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
-
- /* Set everything to the work register class, unless it has somewhere
- * special to go */
-
- mir_foreach_block(ctx, block) {
- mir_foreach_instr_in_block(block, ins) {
- if (ins->compact_branch) continue;
-
- if (ins->ssa_args.dest < 0) continue;
-
- if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
-
- int class = primary_class;
-
- ra_set_node_class(g, ins->ssa_args.dest, class);
- }
- }
-
- for (int index = 0; index <= ctx->max_hash; ++index) {
- unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
-
- if (temp) {
- unsigned reg = temp - 1;
- int t = find_or_allocate_temp(ctx, index);
- ra_set_node_reg(g, t, reg);
- }
- }
-
- /* Determine liveness */
-
- int *live_start = malloc(nodes * sizeof(int));
- int *live_end = malloc(nodes * sizeof(int));
-
- /* Initialize as non-existent */
-
- for (int i = 0; i < nodes; ++i) {
- live_start[i] = live_end[i] = -1;
- }
-
- int d = 0;
-
- mir_foreach_block(ctx, block) {
- mir_foreach_instr_in_block(block, ins) {
- if (ins->compact_branch) continue;
-
- /* Dest is < 0 for st_vary instructions, which break
- * the usual SSA conventions. Liveness analysis doesn't
- * make sense on these instructions, so skip them to
- * avoid memory corruption */
-
- if (ins->ssa_args.dest < 0) continue;
-
- if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
- /* If this destination is not yet live, it is now since we just wrote it */
-
- int dest = ins->ssa_args.dest;
-
- if (live_start[dest] == -1)
- live_start[dest] = d;
- }
-
- /* Since we just used a source, the source might be
- * dead now. Scan the rest of the block for
- * invocations, and if there are none, the source dies
- * */
-
- int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
-
- for (int src = 0; src < 2; ++src) {
- int s = sources[src];
-
- if (s < 0) continue;
-
- if (s >= SSA_FIXED_MINIMUM) continue;
-
- if (!is_live_after(ctx, block, ins, s)) {
- live_end[s] = d;
- }
- }
-
- ++d;
- }
- }
-
- /* If a node still hasn't been killed, kill it now */
-
- for (int i = 0; i < nodes; ++i) {
- /* live_start == -1 most likely indicates a pinned output */
-
- if (live_end[i] == -1)
- live_end[i] = d;
- }
-
- /* Setup interference between nodes that are live at the same time */
-
- for (int i = 0; i < nodes; ++i) {
- for (int j = i + 1; j < nodes; ++j) {
- if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
- ra_add_node_interference(g, i, j);
- }
- }
-
- ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
-
- if (!ra_allocate(g)) {
- DBG("Error allocating registers\n");
- assert(0);
- }
-
- /* Cleanup */
- free(live_start);
- free(live_end);
-
- return g;
-}
-
/* Midgard IR only knows vector ALU types, but we sometimes need to actually
* use scalar ALU instructions, for functional or performance reasons. To do
* this, we just demote vector ALU payloads to scalar. */
@@ -3247,7 +2509,7 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block)
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
if (midgard_is_pinned(ctx, ins->ssa_args.dest)) continue;
- if (is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
+ if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
mir_remove_instruction(ins);
progress = true;
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_liveness.c b/src/gallium/drivers/panfrost/midgard/midgard_liveness.c
new file mode 100644
index 00000000000..ab36583ef39
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/midgard_liveness.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* mir_is_live_after performs liveness analysis on the MIR, used primarily
+ * as part of register allocation. TODO: Algorithmic improvements for
+ * compiler performance (this is the worst algorithm possible -- see
+ * backlog with Connor on IRC) */
+
+#include "compiler.h"
+
+static bool
+midgard_is_live_in_instr(midgard_instruction *ins, int src)
+{
+ if (ins->ssa_args.src0 == src) return true;
+ if (ins->ssa_args.src1 == src) return true;
+
+ return false;
+}
+
+/* Determine if a variable is live in the successors of a block */
+static bool
+is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
+{
+ for (unsigned i = 0; i < bl->nr_successors; ++i) {
+ midgard_block *succ = bl->successors[i];
+
+ /* If we already visited, the value we're seeking
+ * isn't down this path (or we would have short
+ * circuited */
+
+ if (succ->visited) continue;
+
+ /* Otherwise (it's visited *now*), check the block */
+
+ succ->visited = true;
+
+ mir_foreach_instr_in_block(succ, ins) {
+ if (midgard_is_live_in_instr(ins, src))
+ return true;
+ }
+
+ /* ...and also, check *its* successors */
+ if (is_live_after_successors(ctx, succ, src))
+ return true;
+
+ }
+
+ /* Welp. We're really not live. */
+
+ return false;
+}
+
+bool
+mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
+{
+ /* Check the rest of the block for liveness */
+
+ mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
+ if (midgard_is_live_in_instr(ins, src))
+ return true;
+ }
+
+ /* Check the rest of the blocks for liveness recursively */
+
+ bool succ = is_live_after_successors(ctx, block, src);
+
+ mir_foreach_block(ctx, block) {
+ block->visited = false;
+ }
+
+ return succ;
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.c b/src/gallium/drivers/panfrost/midgard/midgard_ops.c
new file mode 100644
index 00000000000..cffa3c20fdf
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ops.c
@@ -0,0 +1,188 @@
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig ([email protected])
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "midgard.h"
+
+/* Include the definitions of the macros and such */
+
+#define MIDGARD_OPS_TABLE
+#include "helpers.h"
+#undef MIDGARD_OPS_TABLE
+
+/* Table of mapping opcodes to accompanying properties. This is used for both
+ * the disassembler and the compiler. It is placed in a .c file like this to
+ * avoid duplications in the binary */
+
+struct mir_op_props alu_opcode_props[256] = {
+ [midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES},
+ [midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
+ [midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
+ [midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
+ [midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
+ [midgard_alu_op_fround] = {"fround", UNITS_ADD},
+ [midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD},
+ [midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD},
+ [midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD},
+ [midgard_alu_op_fceil] = {"fceil", UNITS_ADD},
+ [midgard_alu_op_ffma] = {"ffma", UNIT_VLUT},
+
+ /* Though they output a scalar, they need to run on a vector unit
+ * since they process vectors */
+ [midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
+ [midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
+ [midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
+
+ /* Incredibly, iadd can run on vmul, etc */
+ [midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_iabs] = {"iabs", UNITS_ADD},
+ [midgard_alu_op_isub] = {"isub", UNITS_MOST},
+ [midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES},
+ [midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
+
+ /* For vector comparisons, use ball etc */
+ [midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_fle] = {"fle", UNITS_MOST},
+ [midgard_alu_op_flt] = {"flt", UNITS_MOST},
+ [midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_ilt] = {"ilt", UNITS_MOST},
+ [midgard_alu_op_ile] = {"ile", UNITS_MOST},
+ [midgard_alu_op_ult] = {"ult", UNITS_MOST},
+ [midgard_alu_op_ule] = {"ule", UNITS_MOST},
+
+ [midgard_alu_op_icsel] = {"icsel", UNITS_ADD},
+ [midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD},
+ [midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD},
+ [midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL},
+
+ [midgard_alu_op_frcp] = {"frcp", UNIT_VLUT},
+ [midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT},
+ [midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT},
+ [midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT},
+ [midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT},
+ [midgard_alu_op_flog2] = {"flog2", UNIT_VLUT},
+
+ [midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
+ [midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
+ [midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
+ [midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
+ [midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
+
+ [midgard_alu_op_fsin] = {"fsin", UNIT_VLUT},
+ [midgard_alu_op_fcos] = {"fcos", UNIT_VLUT},
+
+ /* XXX: Test case where it's right on smul but not sadd */
+ [midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST},
+
+ [midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES},
+ [midgard_alu_op_iclz] = {"iclz", UNITS_ADD},
+ [midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD},
+ [midgard_alu_op_inand] = {"inand", UNITS_MOST},
+ [midgard_alu_op_ishl] = {"ishl", UNITS_ADD},
+ [midgard_alu_op_iasr] = {"iasr", UNITS_ADD},
+ [midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD},
+
+ [midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
+ [midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
+ [midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
+ [midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
+ [midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
+ [midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
+
+ /* These instructions are not yet emitted by the compiler, so
+ * don't speculate about units yet */
+ [midgard_alu_op_ishladd] = {"ishladd", 0},
+
+ [midgard_alu_op_uball_lt] = {"uball_lt", 0},
+ [midgard_alu_op_uball_lte] = {"uball_lte", 0},
+ [midgard_alu_op_iball_lt] = {"iball_lt", 0},
+ [midgard_alu_op_iball_lte] = {"iball_lte", 0},
+ [midgard_alu_op_ubany_lt] = {"ubany_lt", 0},
+ [midgard_alu_op_ubany_lte] = {"ubany_lte", 0},
+ [midgard_alu_op_ibany_lt] = {"ibany_lt", 0},
+ [midgard_alu_op_ibany_lte] = {"ibany_lte", 0},
+
+ [midgard_alu_op_freduce] = {"freduce", 0},
+ [midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES},
+ [midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES},
+ [midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0},
+ [midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0},
+};
+
+const char *load_store_opcode_names[256] = {
+ [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
+ [midgard_op_ld_global_id] = "ld_global_id",
+
+ [midgard_op_atomic_add] = "atomic_add",
+ [midgard_op_atomic_and] = "atomic_and",
+ [midgard_op_atomic_or] = "atomic_or",
+ [midgard_op_atomic_xor] = "atomic_xor",
+ [midgard_op_atomic_imin] = "atomic_imin",
+ [midgard_op_atomic_umin] = "atomic_umin",
+ [midgard_op_atomic_imax] = "atomic_imax",
+ [midgard_op_atomic_umax] = "atomic_umax",
+ [midgard_op_atomic_xchg] = "atomic_xchg",
+
+ [midgard_op_ld_char] = "ld_char",
+ [midgard_op_ld_char2] = "ld_char2",
+ [midgard_op_ld_short] = "ld_short",
+ [midgard_op_ld_char4] = "ld_char4",
+ [midgard_op_ld_short4] = "ld_short4",
+ [midgard_op_ld_int4] = "ld_int4",
+
+ [midgard_op_ld_attr_32] = "ld_attr_32",
+ [midgard_op_ld_attr_16] = "ld_attr_16",
+ [midgard_op_ld_attr_32i] = "ld_attr_32i",
+
+ [midgard_op_ld_vary_32] = "ld_vary_32",
+ [midgard_op_ld_vary_16] = "ld_vary_16",
+ [midgard_op_ld_vary_32i] = "ld_vary_32i",
+
+ [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
+
+ [midgard_op_ld_uniform_16] = "ld_uniform_16",
+ [midgard_op_ld_uniform_32] = "ld_uniform_32",
+ [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
+
+ [midgard_op_st_char] = "st_char",
+ [midgard_op_st_char2] = "st_char2",
+ [midgard_op_st_char4] = "st_char4",
+ [midgard_op_st_short4] = "st_short4",
+ [midgard_op_st_int4] = "st_int4",
+
+ [midgard_op_st_vary_32] = "st_vary_32",
+ [midgard_op_st_vary_16] = "st_vary_16",
+ [midgard_op_st_vary_32i] = "st_vary_32i",
+
+ [midgard_op_st_image_f] = "st_image_f",
+ [midgard_op_st_image_ui] = "st_image_ui",
+ [midgard_op_st_image_i] = "st_image_i",
+};
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.h b/src/gallium/drivers/panfrost/midgard/midgard_ops.h
new file mode 100644
index 00000000000..8b363529aa9
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ops.h
@@ -0,0 +1,53 @@
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig ([email protected])
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "helpers.h"
+
+/* Forward declare */
+
+extern struct mir_op_props alu_opcode_props[256];
+extern const char *load_store_opcode_names[256];
+
+/* Is this opcode that of an integer (regardless of signedness)? Instruction
+ * names authoritatively determine types */
+
+static inline bool
+midgard_is_integer_op(int op)
+{
+ const char *name = alu_opcode_props[op].name;
+
+ if (!name)
+ return false;
+
+ return (name[0] == 'i') || (name[0] == 'u');
+}
+
+/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
+ * conversion between int<->float in which case we do the opposite */
+
+static inline bool
+midgard_is_integer_out_op(int op)
+{
+ bool is_int = midgard_is_integer_op(op);
+ bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
+
+ return is_int ^ is_conversion;
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_print.c b/src/gallium/drivers/panfrost/midgard/midgard_print.c
new file mode 100644
index 00000000000..348650ecf30
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/midgard_print.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+#include "helpers.h"
+#include "midgard_ops.h"
+
+/* Pretty printer for Midgard IR, for use debugging compiler-internal
+ * passes like register allocation. The output superficially resembles
+ * Midgard assembly, with the exception that unit information and such is
+ * (normally) omitted, and generic indices are usually used instead of
+ * registers */
+
+static void
+mir_print_source(int source)
+{
+ if (source >= SSA_FIXED_MINIMUM) {
+ /* Specific register */
+ int reg = SSA_REG_FROM_FIXED(source);
+
+ /* TODO: Moving threshold */
+ if (reg > 16 && reg < 24)
+ printf("u%d", 23 - reg);
+ else
+ printf("r%d", reg);
+ } else {
+ printf("%d", source);
+ }
+}
+
+void
+mir_print_instruction(midgard_instruction *ins)
+{
+ printf("\t");
+
+ switch (ins->type) {
+ case TAG_ALU_4: {
+ midgard_alu_op op = ins->alu.op;
+ const char *name = alu_opcode_props[op].name;
+
+ if (ins->unit)
+ printf("%d.", ins->unit);
+
+ printf("%s", name ? name : "??");
+ break;
+ }
+
+ case TAG_LOAD_STORE_4: {
+ midgard_load_store_op op = ins->load_store.op;
+ const char *name = load_store_opcode_names[op];
+
+ assert(name);
+ printf("%s", name);
+ break;
+ }
+
+ case TAG_TEXTURE_4: {
+ printf("texture");
+ break;
+ }
+
+ default:
+ assert(0);
+ }
+
+ ssa_args *args = &ins->ssa_args;
+
+ printf(" %d, ", args->dest);
+
+ mir_print_source(args->src0);
+ printf(", ");
+
+ if (args->inline_constant)
+ printf("#%d", ins->inline_constant);
+ else
+ mir_print_source(args->src1);
+
+ if (ins->has_constants)
+ printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
+
+ printf("\n");
+}
+
+/* Dumps MIR for a block or entire shader respective */
+
+void
+mir_print_block(midgard_block *block)
+{
+ printf("{\n");
+
+ mir_foreach_instr_in_block(block, ins) {
+ mir_print_instruction(ins);
+ }
+
+ printf("}\n");
+}
+
+void
+mir_print_shader(compiler_context *ctx)
+{
+ mir_foreach_block(ctx, block) {
+ mir_print_block(block);
+ }
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
new file mode 100644
index 00000000000..594cafe45ae
--- /dev/null
+++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+#include "util/register_allocate.h"
+
+/* When we're 'squeezing down' the values in the IR, we maintain a hash
+ * as such */
+
+static unsigned
+find_or_allocate_temp(compiler_context *ctx, unsigned hash)
+{
+ if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
+ return hash;
+
+ unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
+
+ if (temp)
+ return temp - 1;
+
+ /* If no temp is find, allocate one */
+ temp = ctx->temp_count++;
+ ctx->max_hash = MAX2(ctx->max_hash, hash);
+
+ _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
+
+ return temp;
+}
+
+/* Callback for register allocation selection, trivial default for now */
+
+static unsigned int
+midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
+{
+ /* Choose the first available register to minimise reported register pressure */
+
+ for (int i = 0; i < 16; ++i) {
+ if (BITSET_TEST(regs, i)) {
+ return i;
+ }
+ }
+
+ assert(0);
+ return 0;
+}
+
+/* Determine the actual hardware from the index based on the RA results or special values */
+
+static int
+dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
+{
+ if (reg >= SSA_FIXED_MINIMUM)
+ return SSA_REG_FROM_FIXED(reg);
+
+ if (reg >= 0) {
+ assert(reg < maxreg);
+ assert(g);
+ int r = ra_get_node_reg(g, reg);
+ ctx->work_registers = MAX2(ctx->work_registers, r);
+ return r;
+ }
+
+ switch (reg) {
+ case SSA_UNUSED_0:
+ case SSA_UNUSED_1:
+ return REGISTER_UNUSED;
+
+ default:
+ unreachable("Unknown SSA register alias");
+ }
+}
+
+/* This routine performs the actual register allocation. It should be succeeded
+ * by install_registers */
+
+struct ra_graph *
+allocate_registers(compiler_context *ctx)
+{
+ /* First, initialize the RA */
+ struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
+
+ /* Create a primary (general purpose) class, as well as special purpose
+ * pipeline register classes */
+
+ int primary_class = ra_alloc_reg_class(regs);
+ int varying_class = ra_alloc_reg_class(regs);
+
+ /* Add the full set of work registers */
+ int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+ for (int i = 0; i < work_count; ++i)
+ ra_class_add_reg(regs, primary_class, i);
+
+ /* Add special registers */
+ ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
+ ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
+
+ /* We're done setting up */
+ ra_set_finalize(regs, NULL);
+
+ /* Transform the MIR into squeezed index form */
+ mir_foreach_block(ctx, block) {
+ mir_foreach_instr_in_block(block, ins) {
+ if (ins->compact_branch) continue;
+
+ ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
+ ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
+ ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
+ }
+ }
+
+ /* No register allocation to do with no SSA */
+
+ if (!ctx->temp_count)
+ return NULL;
+
+ /* Let's actually do register allocation */
+ int nodes = ctx->temp_count;
+ struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
+
+ /* Set everything to the work register class, unless it has somewhere
+ * special to go */
+
+ mir_foreach_block(ctx, block) {
+ mir_foreach_instr_in_block(block, ins) {
+ if (ins->compact_branch) continue;
+
+ if (ins->ssa_args.dest < 0) continue;
+
+ if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
+
+ int class = primary_class;
+
+ ra_set_node_class(g, ins->ssa_args.dest, class);
+ }
+ }
+
+ for (int index = 0; index <= ctx->max_hash; ++index) {
+ unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
+
+ if (temp) {
+ unsigned reg = temp - 1;
+ int t = find_or_allocate_temp(ctx, index);
+ ra_set_node_reg(g, t, reg);
+ }
+ }
+
+ /* Determine liveness */
+
+ int *live_start = malloc(nodes * sizeof(int));
+ int *live_end = malloc(nodes * sizeof(int));
+
+ /* Initialize as non-existent */
+
+ for (int i = 0; i < nodes; ++i) {
+ live_start[i] = live_end[i] = -1;
+ }
+
+ int d = 0;
+
+ mir_foreach_block(ctx, block) {
+ mir_foreach_instr_in_block(block, ins) {
+ if (ins->compact_branch) continue;
+
+ /* Dest is < 0 for st_vary instructions, which break
+ * the usual SSA conventions. Liveness analysis doesn't
+ * make sense on these instructions, so skip them to
+ * avoid memory corruption */
+
+ if (ins->ssa_args.dest < 0) continue;
+
+ if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
+ /* If this destination is not yet live, it is now since we just wrote it */
+
+ int dest = ins->ssa_args.dest;
+
+ if (live_start[dest] == -1)
+ live_start[dest] = d;
+ }
+
+ /* Since we just used a source, the source might be
+ * dead now. Scan the rest of the block for
+ * invocations, and if there are none, the source dies
+ * */
+
+ int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
+
+ for (int src = 0; src < 2; ++src) {
+ int s = sources[src];
+
+ if (s < 0) continue;
+
+ if (s >= SSA_FIXED_MINIMUM) continue;
+
+ if (!mir_is_live_after(ctx, block, ins, s)) {
+ live_end[s] = d;
+ }
+ }
+
+ ++d;
+ }
+ }
+
+ /* If a node still hasn't been killed, kill it now */
+
+ for (int i = 0; i < nodes; ++i) {
+ /* live_start == -1 most likely indicates a pinned output */
+
+ if (live_end[i] == -1)
+ live_end[i] = d;
+ }
+
+ /* Setup interference between nodes that are live at the same time */
+
+ for (int i = 0; i < nodes; ++i) {
+ for (int j = i + 1; j < nodes; ++j) {
+ if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
+ ra_add_node_interference(g, i, j);
+ }
+ }
+
+ ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
+
+ if (!ra_allocate(g)) {
+ unreachable("Error allocating registers\n");
+ }
+
+ /* Cleanup */
+ free(live_start);
+ free(live_end);
+
+ return g;
+}
+
+/* Once registers have been decided via register allocation
+ * (allocate_registers), we need to rewrite the MIR to use registers instead of
+ * SSA */
+
+void
+install_registers(compiler_context *ctx, struct ra_graph *g)
+{
+ mir_foreach_block(ctx, block) {
+ mir_foreach_instr_in_block(block, ins) {
+ if (ins->compact_branch) continue;
+
+ ssa_args args = ins->ssa_args;
+
+ switch (ins->type) {
+ case TAG_ALU_4:
+ ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
+
+ ins->registers.src2_imm = args.inline_constant;
+
+ if (args.inline_constant) {
+ /* Encode inline 16-bit constant as a vector by default */
+
+ ins->registers.src2_reg = ins->inline_constant >> 11;
+
+ int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+
+ uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
+ ins->alu.src2 = imm << 2;
+ } else {
+ ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
+ }
+
+ ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
+
+ break;
+
+ case TAG_LOAD_STORE_4: {
+ if (OP_IS_STORE_VARY(ins->load_store.op)) {
+ /* TODO: use ssa_args for st_vary */
+ ins->load_store.reg = 0;
+ } else {
+ bool has_dest = args.dest >= 0;
+ int ssa_arg = has_dest ? args.dest : args.src0;
+
+ ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+
+}