broadcom: Add VC5 NIR compiler.

This is a pretty straightforward fork of VC4's NIR compiler to VC5. The condition codes, registers, and I/O have all changed, making the backend hard to share, though their heritage is still recognizable. v2: Move to src/broadcom/compiler to match intel's layout, rename more "vc5" to "v3d", rename QIR to VIR ("V3D IR") to avoid symbol conflicts with vc4, use new v3d_debug header, add compiler init/free functions, do texture swizzling in NIR to allow optimization.
author: Eric Anholt <[email protected]> 2017-02-03 10:24:14 -0800
committer: Eric Anholt <[email protected]> 2017-10-10 11:42:04 -0700
commit: ade416d02369cc0942d53ad3cce601d66344f9c3 (patch)
tree: f9824bfb74dc9ef33b78dc67295768fdd07a513a /src/broadcom/compiler/vir_to_qpu.c
parent: f71364f29787d0f822474ed970c5f28bede49abb (diff)
1 files changed, 359 insertions, 0 deletions
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
new file mode 100644
index 00000000000..78bcea1e302
--- /dev/null
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/v3d_compiler.h"
+#include "qpu/qpu_instr.h"
+#include "qpu/qpu_disasm.h"
+
+static inline struct qpu_reg
+qpu_reg(int index)
+{
+        struct qpu_reg reg = {
+                .magic = false,
+                .index = index,
+        };
+        return reg;
+}
+
+static inline struct qpu_reg
+qpu_magic(enum v3d_qpu_waddr waddr)
+{
+        struct qpu_reg reg = {
+                .magic = true,
+                .index = waddr,
+        };
+        return reg;
+}
+
+static inline struct qpu_reg
+qpu_acc(int acc)
+{
+        return qpu_magic(V3D_QPU_WADDR_R0 + acc);
+}
+
+struct v3d_qpu_instr
+v3d_qpu_nop(void)
+{
+        struct v3d_qpu_instr instr = {
+                .type = V3D_QPU_INSTR_TYPE_ALU,
+                .alu = {
+                        .add = {
+                                .op = V3D_QPU_A_NOP,
+                                .waddr = V3D_QPU_WADDR_NOP,
+                                .magic_write = true,
+                        },
+                        .mul = {
+                                .op = V3D_QPU_M_NOP,
+                                .waddr = V3D_QPU_WADDR_NOP,
+                                .magic_write = true,
+                        },
+                }
+        };
+
+        return instr;
+}
+
+static struct qinst *
+vir_nop(void)
+{
+        struct qreg undef = { QFILE_NULL, 0 };
+        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
+
+        return qinst;
+}
+
+static struct qinst *
+new_qpu_nop_before(struct qinst *inst)
+{
+        struct qinst *q = vir_nop();
+
+        list_addtail(&q->link, &inst->link);
+
+        return q;
+}
+
+static void
+new_ldunif_instr(struct qinst *inst, int i)
+{
+        struct qinst *ldunif = new_qpu_nop_before(inst);
+
+        ldunif->qpu.sig.ldunif = true;
+        assert(inst->src[i].file == QFILE_UNIF);
+        ldunif->uniform = inst->src[i].index;
+}
+
+/**
+ * Allocates the src register (accumulator or register file) into the RADDR
+ * fields of the instruction.
+ */
+static void
+set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
+{
+        if (src.magic) {
+                assert(src.index >= V3D_QPU_WADDR_R0 &&
+                       src.index <= V3D_QPU_WADDR_R5);
+                *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
+                return;
+        }
+
+        if (instr->alu.add.a != V3D_QPU_MUX_A &&
+            instr->alu.add.b != V3D_QPU_MUX_A &&
+            instr->alu.mul.a != V3D_QPU_MUX_A &&
+            instr->alu.mul.b != V3D_QPU_MUX_A) {
+                instr->raddr_a = src.index;
+                *mux = V3D_QPU_MUX_A;
+        } else {
+                if (instr->raddr_a == src.index) {
+                        *mux = V3D_QPU_MUX_A;
+                } else {
+                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
+                                 instr->alu.add.b == V3D_QPU_MUX_B &&
+                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
+                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
+                               src.index == instr->raddr_b);
+
+                        instr->raddr_b = src.index;
+                        *mux = V3D_QPU_MUX_B;
+                }
+        }
+}
+
+static void
+v3d_generate_code_block(struct v3d_compile *c,
+                        struct qblock *block,
+                        struct qpu_reg *temp_registers)
+{
+        int last_vpm_read_index = -1;
+
+        vir_for_each_inst(qinst, block) {
+#if 0
+                fprintf(stderr, "translating qinst to qpu: ");
+                vir_dump_inst(c, qinst);
+                fprintf(stderr, "\n");
+#endif
+
+                struct qinst *temp;
+
+                if (vir_has_implicit_uniform(qinst)) {
+                        int src = vir_get_implicit_uniform_src(qinst);
+                        assert(qinst->src[src].file == QFILE_UNIF);
+                        qinst->uniform = qinst->src[src].index;
+                        c->num_uniforms++;
+                }
+
+                int nsrc = vir_get_non_sideband_nsrc(qinst);
+                struct qpu_reg src[ARRAY_SIZE(qinst->src)];
+                bool emitted_ldunif = false;
+                for (int i = 0; i < nsrc; i++) {
+                        int index = qinst->src[i].index;
+                        switch (qinst->src[i].file) {
+                        case QFILE_REG:
+                                src[i] = qpu_reg(qinst->src[i].index);
+                                break;
+                        case QFILE_MAGIC:
+                                src[i] = qpu_magic(qinst->src[i].index);
+                                break;
+                        case QFILE_NULL:
+                        case QFILE_LOAD_IMM:
+                                src[i] = qpu_acc(0);
+                                break;
+                        case QFILE_TEMP:
+                                src[i] = temp_registers[index];
+                                break;
+                        case QFILE_UNIF:
+                                if (!emitted_ldunif) {
+                                        new_ldunif_instr(qinst, i);
+                                        c->num_uniforms++;
+                                        emitted_ldunif = true;
+                                }
+
+                                src[i] = qpu_acc(5);
+                                break;
+                        case QFILE_VARY:
+                                temp = new_qpu_nop_before(qinst);
+                                temp->qpu.sig.ldvary = true;
+
+                                src[i] = qpu_acc(3);
+                                break;
+                        case QFILE_SMALL_IMM:
+                                abort(); /* XXX */
+#if 0
+                                src[i].mux = QPU_MUX_SMALL_IMM;
+                                src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
+                                /* This should only have returned a valid
+                                 * small immediate field, not ~0 for failure.
+                                 */
+                                assert(src[i].addr <= 47);
+#endif
+                                break;
+
+                        case QFILE_VPM:
+                                assert((int)qinst->src[i].index >=
+                                       last_vpm_read_index);
+                                (void)last_vpm_read_index;
+                                last_vpm_read_index = qinst->src[i].index;
+
+                                temp = new_qpu_nop_before(qinst);
+                                temp->qpu.sig.ldvpm = true;
+
+                                src[i] = qpu_acc(3);
+                                break;
+
+                        case QFILE_TLB:
+                        case QFILE_TLBU:
+                                unreachable("bad vir src file");
+                        }
+                }
+
+                struct qpu_reg dst;
+                switch (qinst->dst.file) {
+                case QFILE_NULL:
+                        dst = qpu_magic(V3D_QPU_WADDR_NOP);
+                        break;
+
+                case QFILE_REG:
+                        dst = qpu_reg(qinst->dst.index);
+                        break;
+
+                case QFILE_MAGIC:
+                        dst = qpu_magic(qinst->dst.index);
+                        break;
+
+                case QFILE_TEMP:
+                        dst = temp_registers[qinst->dst.index];
+                        break;
+
+                case QFILE_VPM:
+                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
+                        break;
+
+                case QFILE_TLB:
+                        dst = qpu_magic(V3D_QPU_WADDR_TLB);
+                        break;
+
+                case QFILE_TLBU:
+                        dst = qpu_magic(V3D_QPU_WADDR_TLBU);
+                        break;
+
+                case QFILE_VARY:
+                case QFILE_UNIF:
+                case QFILE_SMALL_IMM:
+                case QFILE_LOAD_IMM:
+                        assert(!"not reached");
+                        break;
+                }
+
+                if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                        if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+                                if (nsrc >= 1) {
+                                        set_src(&qinst->qpu,
+                                                &qinst->qpu.alu.add.a, src[0]);
+                                }
+                                if (nsrc >= 2) {
+                                        set_src(&qinst->qpu,
+                                                &qinst->qpu.alu.add.b, src[1]);
+                                }
+
+                                qinst->qpu.alu.add.waddr = dst.index;
+                                qinst->qpu.alu.add.magic_write = dst.magic;
+                        } else {
+                                if (nsrc >= 1) {
+                                        set_src(&qinst->qpu,
+                                                &qinst->qpu.alu.mul.a, src[0]);
+                                }
+                                if (nsrc >= 2) {
+                                        set_src(&qinst->qpu,
+                                                &qinst->qpu.alu.mul.b, src[1]);
+                                }
+
+                                qinst->qpu.alu.mul.waddr = dst.index;
+                                qinst->qpu.alu.mul.magic_write = dst.magic;
+                        }
+                } else {
+                        assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
+                }
+        }
+}
+
+
+static void
+v3d_dump_qpu(struct v3d_compile *c)
+{
+        fprintf(stderr, "%s prog %d/%d QPU:\n",
+                vir_get_stage_name(c),
+                c->program_id, c->variant_id);
+
+        for (int i = 0; i < c->qpu_inst_count; i++) {
+                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
+                fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
+        }
+        fprintf(stderr, "\n");
+}
+
+void
+v3d_vir_to_qpu(struct v3d_compile *c)
+{
+        struct qpu_reg *temp_registers = v3d_register_allocate(c);
+        struct qblock *end_block = list_last_entry(&c->blocks,
+                                                   struct qblock, link);
+
+        /* Reset the uniform count to how many will be actually loaded by the
+         * generated QPU code.
+         */
+        c->num_uniforms = 0;
+
+        vir_for_each_block(block, c)
+                v3d_generate_code_block(c, block, temp_registers);
+
+        struct qinst *thrsw = vir_nop();
+        list_addtail(&thrsw->link, &end_block->instructions);
+        thrsw->qpu.sig.thrsw = true;
+
+        uint32_t cycles = v3d_qpu_schedule_instructions(c);
+
+        c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
+        int i = 0;
+        vir_for_each_inst_inorder(inst, c) {
+                bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
+                                             &c->qpu_insts[i++]);
+                assert(ok); (void) ok;
+        }
+        assert(i == c->qpu_inst_count);
+
+        if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
+                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
+                        vir_get_stage_name(c),
+                        c->program_id, c->variant_id,
+                        cycles);
+        }
+
+        if (V3D_DEBUG & (V3D_DEBUG_QPU |
+                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                v3d_dump_qpu(c);
+        }
+
+        qpu_validate(c);
+
+        free(temp_registers);
+}
author	Eric Anholt <[email protected]>	2017-02-03 10:24:14 -0800
committer	Eric Anholt <[email protected]>	2017-10-10 11:42:04 -0700
commit	ade416d02369cc0942d53ad3cce601d66344f9c3 (patch)
tree	f9824bfb74dc9ef33b78dc67295768fdd07a513a /src/broadcom/compiler/vir_to_qpu.c
parent	f71364f29787d0f822474ed970c5f28bede49abb (diff)