summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2012-12-13 05:48:28 +0800
committerChia-I Wu <[email protected]>2013-04-26 16:20:52 +0800
commit7118ff8bb02046bb2f440e2a5c48d9a41bb057b1 (patch)
treefacfe5b880db82818b95521da5edc5803de749be
parent0fa2d0e98aec1e864f7edc225eb9cda449051733 (diff)
ilo: add a toy shader compiler
This is a simple shader compiler that performs almost zero optimizations. The generated code is usually much larger comparing to that generated by i965. The generated code also requires many more registers. Function-wise, it lacks register spilling and does not support most TGSI indirections. Other than those, it works alright.
-rw-r--r--src/gallium/drivers/ilo/Makefile.sources9
-rw-r--r--src/gallium/drivers/ilo/shader/toy_compiler.c556
-rw-r--r--src/gallium/drivers/ilo/shader/toy_compiler.h473
-rw-r--r--src/gallium/drivers/ilo/shader/toy_compiler_asm.c750
-rw-r--r--src/gallium/drivers/ilo/shader/toy_compiler_disasm.c1385
-rw-r--r--src/gallium/drivers/ilo/shader/toy_compiler_reg.h800
-rw-r--r--src/gallium/drivers/ilo/shader/toy_helpers.h289
-rw-r--r--src/gallium/drivers/ilo/shader/toy_legalize.c632
-rw-r--r--src/gallium/drivers/ilo/shader/toy_legalize.h52
-rw-r--r--src/gallium/drivers/ilo/shader/toy_legalize_ra.c628
-rw-r--r--src/gallium/drivers/ilo/shader/toy_optimize.c71
-rw-r--r--src/gallium/drivers/ilo/shader/toy_optimize.h36
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.c2736
-rw-r--r--src/gallium/drivers/ilo/shader/toy_tgsi.h253
14 files changed, 8669 insertions, 1 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index 448f1d19736..c19801e7281 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -16,4 +16,11 @@ C_SOURCES := \
ilo_screen.c \
ilo_shader.c \
ilo_state.c \
- ilo_video.c
+ ilo_video.c \
+ shader/toy_compiler.c \
+ shader/toy_compiler_asm.c \
+ shader/toy_compiler_disasm.c \
+ shader/toy_legalize.c \
+ shader/toy_legalize_ra.c \
+ shader/toy_optimize.c \
+ shader/toy_tgsi.c
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.c b/src/gallium/drivers/ilo/shader/toy_compiler.c
new file mode 100644
index 00000000000..73b03e62594
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_compiler.c
@@ -0,0 +1,556 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "toy_compiler.h"
+
+/**
+ * Dump an operand.
+ */
+static void
+tc_dump_operand(struct toy_compiler *tc,
+ enum toy_file file, enum toy_type type, enum toy_rect rect,
+ bool indirect, unsigned indirect_subreg, uint32_t val32,
+ bool is_dst)
+{
+ static const char *toy_file_names[TOY_FILE_COUNT] = {
+ [TOY_FILE_VRF] = "v",
+ [TOY_FILE_ARF] = "NOT USED",
+ [TOY_FILE_GRF] = "r",
+ [TOY_FILE_MRF] = "m",
+ [TOY_FILE_IMM] = "NOT USED",
+ };
+ const char *name = toy_file_names[file];
+ int reg, subreg;
+
+ if (file != TOY_FILE_IMM) {
+ reg = val32 / TOY_REG_WIDTH;
+ subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type);
+ }
+
+ switch (file) {
+ case TOY_FILE_GRF:
+ if (indirect) {
+ const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW);
+
+ ilo_printf("%s[a0.%d", name, addr_subreg);
+ if (val32)
+ ilo_printf("%+d", (int) val32);
+ ilo_printf("]");
+ break;
+ }
+ /* fall through */
+ case TOY_FILE_VRF:
+ case TOY_FILE_MRF:
+ ilo_printf("%s%d", name, reg);
+ if (subreg)
+ ilo_printf(".%d", subreg);
+ break;
+ case TOY_FILE_ARF:
+ switch (reg) {
+ case BRW_ARF_NULL:
+ ilo_printf("null");
+ break;
+ case BRW_ARF_ADDRESS:
+ ilo_printf("a0.%d", subreg);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ case BRW_ARF_ACCUMULATOR + 1:
+ ilo_printf("acc%d.%d", (reg & 1), subreg);
+ break;
+ case BRW_ARF_FLAG:
+ ilo_printf("f0.%d", subreg);
+ break;
+ case BRW_ARF_STATE:
+ ilo_printf("sr0.%d", subreg);
+ break;
+ case BRW_ARF_CONTROL:
+ ilo_printf("cr0.%d", subreg);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ case BRW_ARF_NOTIFICATION_COUNT + 1:
+ ilo_printf("n%d.%d", (reg & 1), subreg);
+ break;
+ case BRW_ARF_IP:
+ ilo_printf("ip");
+ break;
+ }
+ break;
+ case TOY_FILE_IMM:
+ switch (type) {
+ case TOY_TYPE_F:
+ {
+ union fi fi = { .ui = val32 };
+ ilo_printf("%f", fi.f);
+ }
+ break;
+ case TOY_TYPE_D:
+ ilo_printf("%d", (int32_t) val32);
+ break;
+ case TOY_TYPE_UD:
+ ilo_printf("%u", val32);
+ break;
+ case TOY_TYPE_W:
+ ilo_printf("%d", (int16_t) (val32 & 0xffff));
+ break;
+ case TOY_TYPE_UW:
+ ilo_printf("%u", val32 & 0xffff);
+ break;
+ case TOY_TYPE_V:
+ ilo_printf("0x%08x", val32);
+ break;
+ default:
+ assert(!"unknown imm type");
+ break;
+ }
+ break;
+ default:
+ assert(!"unexpected file");
+ break;
+ }
+
+ /* dump the region parameter */
+ if (file != TOY_FILE_IMM) {
+ int vert_stride, width, horz_stride;
+
+ switch (rect) {
+ case TOY_RECT_LINEAR:
+ vert_stride = tc->rect_linear_width;
+ width = tc->rect_linear_width;
+ horz_stride = 1;
+ break;
+ case TOY_RECT_041:
+ vert_stride = 0;
+ width = 4;
+ horz_stride = 1;
+ break;
+ case TOY_RECT_010:
+ vert_stride = 0;
+ width = 1;
+ horz_stride = 0;
+ break;
+ case TOY_RECT_220:
+ vert_stride = 2;
+ width = 2;
+ horz_stride = 0;
+ break;
+ case TOY_RECT_440:
+ vert_stride = 4;
+ width = 4;
+ horz_stride = 0;
+ break;
+ case TOY_RECT_240:
+ vert_stride = 2;
+ width = 4;
+ horz_stride = 0;
+ break;
+ default:
+ assert(!"unknown rect parameter");
+ vert_stride = 0;
+ width = 0;
+ horz_stride = 0;
+ break;
+ }
+
+ if (is_dst)
+ ilo_printf("<%d>", horz_stride);
+ else
+ ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride);
+ }
+
+ switch (type) {
+ case TOY_TYPE_F:
+ ilo_printf(":f");
+ break;
+ case TOY_TYPE_D:
+ ilo_printf(":d");
+ break;
+ case TOY_TYPE_UD:
+ ilo_printf(":ud");
+ break;
+ case TOY_TYPE_W:
+ ilo_printf(":w");
+ break;
+ case TOY_TYPE_UW:
+ ilo_printf(":uw");
+ break;
+ case TOY_TYPE_V:
+ ilo_printf(":v");
+ break;
+ default:
+ assert(!"unexpected type");
+ break;
+ }
+}
+
+/**
+ * Dump a source operand.
+ */
+static void
+tc_dump_src(struct toy_compiler *tc, struct toy_src src)
+{
+ if (src.negate)
+ ilo_printf("-");
+ if (src.absolute)
+ ilo_printf("|");
+
+ tc_dump_operand(tc, src.file, src.type, src.rect,
+ src.indirect, src.indirect_subreg, src.val32, false);
+
+ if (tsrc_is_swizzled(src)) {
+ const char xyzw[] = "xyzw";
+ ilo_printf(".%c%c%c%c",
+ xyzw[src.swizzle_x],
+ xyzw[src.swizzle_y],
+ xyzw[src.swizzle_z],
+ xyzw[src.swizzle_w]);
+ }
+
+ if (src.absolute)
+ ilo_printf("|");
+}
+
+/**
+ * Dump a destination operand.
+ */
+static void
+tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst)
+{
+ tc_dump_operand(tc, dst.file, dst.type, dst.rect,
+ dst.indirect, dst.indirect_subreg, dst.val32, true);
+
+ if (dst.writemask != TOY_WRITEMASK_XYZW) {
+ ilo_printf(".");
+ if (dst.writemask & TOY_WRITEMASK_X)
+ ilo_printf("x");
+ if (dst.writemask & TOY_WRITEMASK_Y)
+ ilo_printf("y");
+ if (dst.writemask & TOY_WRITEMASK_Z)
+ ilo_printf("z");
+ if (dst.writemask & TOY_WRITEMASK_W)
+ ilo_printf("w");
+ }
+}
+
+static const char *
+get_opcode_name(unsigned opcode)
+{
+ switch (opcode) {
+ case BRW_OPCODE_MOV: return "mov";
+ case BRW_OPCODE_SEL: return "sel";
+ case BRW_OPCODE_NOT: return "not";
+ case BRW_OPCODE_AND: return "and";
+ case BRW_OPCODE_OR: return "or";
+ case BRW_OPCODE_XOR: return "xor";
+ case BRW_OPCODE_SHR: return "shr";
+ case BRW_OPCODE_SHL: return "shl";
+ case BRW_OPCODE_RSR: return "rsr";
+ case BRW_OPCODE_RSL: return "rsl";
+ case BRW_OPCODE_ASR: return "asr";
+ case BRW_OPCODE_CMP: return "cmp";
+ case BRW_OPCODE_CMPN: return "cmpn";
+ case BRW_OPCODE_JMPI: return "jmpi";
+ case BRW_OPCODE_IF: return "if";
+ case BRW_OPCODE_IFF: return "iff";
+ case BRW_OPCODE_ELSE: return "else";
+ case BRW_OPCODE_ENDIF: return "endif";
+ case BRW_OPCODE_DO: return "do";
+ case BRW_OPCODE_WHILE: return "while";
+ case BRW_OPCODE_BREAK: return "break";
+ case BRW_OPCODE_CONTINUE: return "continue";
+ case BRW_OPCODE_HALT: return "halt";
+ case BRW_OPCODE_MSAVE: return "msave";
+ case BRW_OPCODE_MRESTORE: return "mrestore";
+ case BRW_OPCODE_PUSH: return "push";
+ case BRW_OPCODE_POP: return "pop";
+ case BRW_OPCODE_WAIT: return "wait";
+ case BRW_OPCODE_SEND: return "send";
+ case BRW_OPCODE_SENDC: return "sendc";
+ case BRW_OPCODE_MATH: return "math";
+ case BRW_OPCODE_ADD: return "add";
+ case BRW_OPCODE_MUL: return "mul";
+ case BRW_OPCODE_AVG: return "avg";
+ case BRW_OPCODE_FRC: return "frc";
+ case BRW_OPCODE_RNDU: return "rndu";
+ case BRW_OPCODE_RNDD: return "rndd";
+ case BRW_OPCODE_RNDE: return "rnde";
+ case BRW_OPCODE_RNDZ: return "rndz";
+ case BRW_OPCODE_MAC: return "mac";
+ case BRW_OPCODE_MACH: return "mach";
+ case BRW_OPCODE_LZD: return "lzd";
+ case BRW_OPCODE_SAD2: return "sad2";
+ case BRW_OPCODE_SADA2: return "sada2";
+ case BRW_OPCODE_DP4: return "dp4";
+ case BRW_OPCODE_DPH: return "dph";
+ case BRW_OPCODE_DP3: return "dp3";
+ case BRW_OPCODE_DP2: return "dp2";
+ case BRW_OPCODE_DPA2: return "dpa2";
+ case BRW_OPCODE_LINE: return "line";
+ case BRW_OPCODE_PLN: return "pln";
+ case BRW_OPCODE_MAD: return "mad";
+ case BRW_OPCODE_NOP: return "nop";
+ /* TGSI */
+ case TOY_OPCODE_TGSI_IN: return "tgsi.in";
+ case TOY_OPCODE_TGSI_CONST: return "tgsi.const";
+ case TOY_OPCODE_TGSI_SV: return "tgsi.sv";
+ case TOY_OPCODE_TGSI_IMM: return "tgsi.imm";
+ case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch";
+ case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store";
+ case TOY_OPCODE_TGSI_TEX: return "tgsi.tex";
+ case TOY_OPCODE_TGSI_TXB: return "tgsi.txb";
+ case TOY_OPCODE_TGSI_TXD: return "tgsi.txd";
+ case TOY_OPCODE_TGSI_TXL: return "tgsi.txl";
+ case TOY_OPCODE_TGSI_TXP: return "tgsi.txp";
+ case TOY_OPCODE_TGSI_TXF: return "tgsi.txf";
+ case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq";
+ case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz";
+ case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2";
+ case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2";
+ case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2";
+ case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample";
+ case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i";
+ case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms";
+ case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b";
+ case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c";
+ case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz";
+ case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d";
+ case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l";
+ case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4";
+ case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo";
+ case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos";
+ case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info";
+ /* math */
+ case TOY_OPCODE_INV: return "math.inv";
+ case TOY_OPCODE_LOG: return "math.log";
+ case TOY_OPCODE_EXP: return "math.exp";
+ case TOY_OPCODE_SQRT: return "math.sqrt";
+ case TOY_OPCODE_RSQ: return "math.rsq";
+ case TOY_OPCODE_SIN: return "math.sin";
+ case TOY_OPCODE_COS: return "math.cos";
+ case TOY_OPCODE_FDIV: return "math.fdiv";
+ case TOY_OPCODE_POW: return "math.pow";
+ case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient";
+ case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer";
+ /* urb */
+ case TOY_OPCODE_URB_WRITE: return "urb.urb_write";
+ /* gs */
+ case TOY_OPCODE_EMIT: return "gs.emit";
+ case TOY_OPCODE_ENDPRIM: return "gs.endprim";
+ /* fs */
+ case TOY_OPCODE_DDX: return "fs.ddx";
+ case TOY_OPCODE_DDY: return "fs.ddy";
+ case TOY_OPCODE_FB_WRITE: return "fs.fb_write";
+ case TOY_OPCODE_KIL: return "fs.kil";
+ default: return "unk";
+ }
+}
+
+static const char *
+get_cond_modifier_name(unsigned opcode, unsigned cond_modifier)
+{
+ switch (opcode) {
+ case BRW_OPCODE_SEND:
+ case BRW_OPCODE_SENDC:
+ /* SFID */
+ switch (cond_modifier) {
+ case BRW_SFID_NULL: return "Null";
+ case BRW_SFID_SAMPLER: return "Sampling Engine";
+ case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway";
+ case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache";
+ case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache";
+ case BRW_SFID_URB: return "URB";
+ case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner";
+ case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache";
+ default: return "Unknown";
+ }
+ break;
+ case BRW_OPCODE_MATH:
+ /* FC */
+ switch (cond_modifier) {
+ case BRW_MATH_FUNCTION_INV: return "INV";
+ case BRW_MATH_FUNCTION_LOG: return "LOG";
+ case BRW_MATH_FUNCTION_EXP: return "EXP";
+ case BRW_MATH_FUNCTION_SQRT: return "SQRT";
+ case BRW_MATH_FUNCTION_RSQ: return "RSQ";
+ case BRW_MATH_FUNCTION_SIN: return "SIN";
+ case BRW_MATH_FUNCTION_COS: return "COS";
+ case BRW_MATH_FUNCTION_FDIV: return "FDIV";
+ case BRW_MATH_FUNCTION_POW: return "POW";
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)";
+ case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)";
+ default: return "UNK";
+ }
+ break;
+ default:
+ switch (cond_modifier) {
+ case BRW_CONDITIONAL_NONE: return NULL;
+ case BRW_CONDITIONAL_Z: return "z";
+ case BRW_CONDITIONAL_NZ: return "nz";
+ case BRW_CONDITIONAL_G: return "g";
+ case BRW_CONDITIONAL_GE: return "ge";
+ case BRW_CONDITIONAL_L: return "l";
+ case BRW_CONDITIONAL_LE: return "le";
+ default: return "unk";
+ }
+ break;
+ }
+}
+
+/**
+ * Dump an instruction.
+ */
+static void
+tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst)
+{
+ const char *name;
+ int i;
+
+ name = get_opcode_name(inst->opcode);
+
+ ilo_printf(" %s", name);
+
+ if (inst->opcode == BRW_OPCODE_NOP) {
+ ilo_printf("\n");
+ return;
+ }
+
+ if (inst->saturate)
+ ilo_printf(".sat");
+
+ name = get_cond_modifier_name(inst->opcode, inst->cond_modifier);
+ if (name)
+ ilo_printf(".%s", name);
+
+ ilo_printf(" ");
+
+ tc_dump_dst(tc, inst->dst);
+
+ for (i = 0; i < Elements(inst->src); i++) {
+ if (tsrc_is_null(inst->src[i]))
+ break;
+
+ ilo_printf(", ");
+ tc_dump_src(tc, inst->src[i]);
+ }
+
+ ilo_printf("\n");
+}
+
+/**
+ * Dump the instructions added to the compiler.
+ */
+void
+toy_compiler_dump(struct toy_compiler *tc)
+{
+ struct toy_inst *inst;
+ int pc;
+
+ pc = 0;
+ tc_head(tc);
+ while ((inst = tc_next_no_skip(tc)) != NULL) {
+ /* we do not generate code for markers */
+ if (inst->marker)
+ ilo_printf("marker:");
+ else
+ ilo_printf("%6d:", pc++);
+
+ tc_dump_inst(tc, inst);
+ }
+}
+
+/**
+ * Clean up the toy compiler.
+ */
+void
+toy_compiler_cleanup(struct toy_compiler *tc)
+{
+ struct toy_inst *inst, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list)
+ util_slab_free(&tc->mempool, inst);
+
+ util_slab_destroy(&tc->mempool);
+}
+
+/**
+ * Initialize the instruction template, from which tc_add() initializes the
+ * newly added instructions.
+ */
+static void
+tc_init_inst_templ(struct toy_compiler *tc)
+{
+ struct toy_inst *templ = &tc->templ;
+ int i;
+
+ templ->opcode = BRW_OPCODE_NOP;
+ templ->access_mode = BRW_ALIGN_1;
+ templ->mask_ctrl = BRW_MASK_ENABLE;
+ templ->dep_ctrl = BRW_DEPENDENCY_NORMAL;
+ templ->qtr_ctrl = GEN6_COMPRESSION_1Q;
+ templ->thread_ctrl = BRW_THREAD_NORMAL;
+ templ->pred_ctrl = BRW_PREDICATE_NONE;
+ templ->pred_inv = false;
+ templ->exec_size = BRW_EXECUTE_1;
+ templ->cond_modifier = BRW_CONDITIONAL_NONE;
+ templ->acc_wr_ctrl = false;
+ templ->saturate = false;
+
+ templ->marker = false;
+
+ templ->dst = tdst_null();
+ for (i = 0; i < Elements(templ->src); i++)
+ templ->src[i] = tsrc_null();
+
+ for (i = 0; i < Elements(templ->tex.offsets); i++)
+ templ->tex.offsets[i] = tsrc_null();
+
+ list_inithead(&templ->list);
+}
+
+/**
+ * Initialize the toy compiler.
+ */
+void
+toy_compiler_init(struct toy_compiler *tc, int gen)
+{
+ memset(tc, 0, sizeof(*tc));
+
+ tc->gen = gen;
+
+ tc_init_inst_templ(tc);
+
+ util_slab_create(&tc->mempool, sizeof(struct toy_inst),
+ 64, UTIL_SLAB_SINGLETHREADED);
+
+ list_inithead(&tc->instructions);
+ /* instructions are added to the tail */
+ tc_tail(tc);
+
+ tc->rect_linear_width = 1;
+
+ /* skip 0 so that util_hash_table_get() never returns NULL */
+ tc->next_vrf = 1;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.h b/src/gallium/drivers/ilo/shader/toy_compiler.h
new file mode 100644
index 00000000000..a6413ead066
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_compiler.h
@@ -0,0 +1,473 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_COMPILER_H
+#define TOY_COMPILER_H
+
+#include "brw_defines.h"
+
+#include "util/u_slab.h"
+#include "ilo_common.h"
+#include "toy_compiler_reg.h"
+
+/**
+ * Toy opcodes.
+ */
+enum toy_opcode {
+ /* 0..127 are reserved for BRW_OPCODE_x */
+ TOY_OPCODE_LAST_HW = 127,
+
+ /* TGSI register functions */
+ TOY_OPCODE_TGSI_IN,
+ TOY_OPCODE_TGSI_CONST,
+ TOY_OPCODE_TGSI_SV,
+ TOY_OPCODE_TGSI_IMM,
+ TOY_OPCODE_TGSI_INDIRECT_FETCH,
+ TOY_OPCODE_TGSI_INDIRECT_STORE,
+
+ /* TGSI sampling functions */
+ TOY_OPCODE_TGSI_TEX,
+ TOY_OPCODE_TGSI_TXB,
+ TOY_OPCODE_TGSI_TXD,
+ TOY_OPCODE_TGSI_TXL,
+ TOY_OPCODE_TGSI_TXP,
+ TOY_OPCODE_TGSI_TXF,
+ TOY_OPCODE_TGSI_TXQ,
+ TOY_OPCODE_TGSI_TXQ_LZ,
+ TOY_OPCODE_TGSI_TEX2,
+ TOY_OPCODE_TGSI_TXB2,
+ TOY_OPCODE_TGSI_TXL2,
+ TOY_OPCODE_TGSI_SAMPLE,
+ TOY_OPCODE_TGSI_SAMPLE_I,
+ TOY_OPCODE_TGSI_SAMPLE_I_MS,
+ TOY_OPCODE_TGSI_SAMPLE_B,
+ TOY_OPCODE_TGSI_SAMPLE_C,
+ TOY_OPCODE_TGSI_SAMPLE_C_LZ,
+ TOY_OPCODE_TGSI_SAMPLE_D,
+ TOY_OPCODE_TGSI_SAMPLE_L,
+ TOY_OPCODE_TGSI_GATHER4,
+ TOY_OPCODE_TGSI_SVIEWINFO,
+ TOY_OPCODE_TGSI_SAMPLE_POS,
+ TOY_OPCODE_TGSI_SAMPLE_INFO,
+
+ /* math functions */
+ TOY_OPCODE_INV,
+ TOY_OPCODE_LOG,
+ TOY_OPCODE_EXP,
+ TOY_OPCODE_SQRT,
+ TOY_OPCODE_RSQ,
+ TOY_OPCODE_SIN,
+ TOY_OPCODE_COS,
+ TOY_OPCODE_FDIV,
+ TOY_OPCODE_POW,
+ TOY_OPCODE_INT_DIV_QUOTIENT,
+ TOY_OPCODE_INT_DIV_REMAINDER,
+
+ /* URB functions */
+ TOY_OPCODE_URB_WRITE,
+
+ /* GS-specific functions */
+ TOY_OPCODE_EMIT,
+ TOY_OPCODE_ENDPRIM,
+
+ /* FS-specific functions */
+ TOY_OPCODE_DDX,
+ TOY_OPCODE_DDY,
+ TOY_OPCODE_FB_WRITE,
+ TOY_OPCODE_KIL,
+};
+
+/**
+ * Toy instruction.
+ */
+struct toy_inst {
+ unsigned opcode:8; /* enum toy_opcode */
+ unsigned access_mode:1; /* BRW_ALIGN_x */
+ unsigned mask_ctrl:1; /* BRW_MASK_x */
+ unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */
+ unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */
+ unsigned thread_ctrl:2; /* BRW_THREAD_x */
+ unsigned pred_ctrl:4; /* BRW_PREDICATE_x */
+ unsigned pred_inv:1; /* true or false */
+ unsigned exec_size:3; /* BRW_EXECUTE_x */
+ unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */
+ unsigned acc_wr_ctrl:1; /* true or false */
+ unsigned saturate:1; /* true or false */
+
+ /* true if the instruction should be ignored for instruction iteration */
+ unsigned marker:1;
+
+ unsigned pad:1;
+
+ struct toy_dst dst;
+ struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */
+
+ struct {
+ int target; /* TGSI_TEXTURE_x */
+ struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */
+ } tex;
+
+ struct list_head list;
+};
+
+/**
+ * Toy compiler.
+ */
+struct toy_compiler {
+ int gen;
+
+ struct toy_inst templ;
+ struct util_slab_mempool mempool;
+ struct list_head instructions;
+ struct list_head *iter, *iter_next;
+
+ /* this is not set until toy_compiler_legalize_for_asm() */
+ int num_instructions;
+
+ int rect_linear_width;
+ int next_vrf;
+
+ bool fail;
+ const char *reason;
+};
+
+/**
+ * Allocate the given number of VRF registers.
+ */
+static inline int
+tc_alloc_vrf(struct toy_compiler *tc, int count)
+{
+ const int vrf = tc->next_vrf;
+
+ tc->next_vrf += count;
+
+ return vrf;
+}
+
+/**
+ * Allocate a temporary register.
+ */
+static inline struct toy_dst
+tc_alloc_tmp(struct toy_compiler *tc)
+{
+ return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0);
+}
+
+/**
+ * Allocate four temporary registers.
+ */
+static inline void
+tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp)
+{
+ tmp[0] = tc_alloc_tmp(tc);
+ tmp[1] = tc_alloc_tmp(tc);
+ tmp[2] = tc_alloc_tmp(tc);
+ tmp[3] = tc_alloc_tmp(tc);
+}
+
+/**
+ * Duplicate an instruction at the current location.
+ */
+static inline struct toy_inst *
+tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst)
+{
+ struct toy_inst *new_inst;
+
+ new_inst = util_slab_alloc(&tc->mempool);
+ if (!new_inst)
+ return NULL;
+
+ *new_inst = *inst;
+ list_addtail(&new_inst->list, tc->iter_next);
+
+ return new_inst;
+}
+
+/**
+ * Move an instruction to the current location.
+ */
+static inline void
+tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ list_del(&inst->list);
+ list_addtail(&inst->list, tc->iter_next);
+}
+
+/**
+ * Discard an instruction.
+ */
+static inline void
+tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ list_del(&inst->list);
+ util_slab_free(&tc->mempool, inst);
+}
+
+/**
+ * Add a new instruction at the current location, using tc->templ as the
+ * template.
+ */
+static inline struct toy_inst *
+tc_add(struct toy_compiler *tc)
+{
+ return tc_duplicate_inst(tc, &tc->templ);
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 3 source operands.
+ */
+static inline struct toy_inst *
+tc_add3(struct toy_compiler *tc, unsigned opcode,
+ struct toy_dst dst,
+ struct toy_src src0,
+ struct toy_src src1,
+ struct toy_src src2)
+{
+ struct toy_inst *inst;
+
+ inst = tc_add(tc);
+ if (!inst)
+ return NULL;
+
+ inst->opcode = opcode;
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+
+ return inst;
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 2 source operands.
+ */
+static inline struct toy_inst *
+tc_add2(struct toy_compiler *tc, int opcode,
+ struct toy_dst dst,
+ struct toy_src src0,
+ struct toy_src src1)
+{
+ return tc_add3(tc, opcode, dst, src0, src1, tsrc_null());
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 1 source operand.
+ */
+static inline struct toy_inst *
+tc_add1(struct toy_compiler *tc, unsigned opcode,
+ struct toy_dst dst,
+ struct toy_src src0)
+{
+ return tc_add2(tc, opcode, dst, src0, tsrc_null());
+}
+
+/**
+ * A convenient version of tc_add() for instructions without source or
+ * destination operands.
+ */
+static inline struct toy_inst *
+tc_add0(struct toy_compiler *tc, unsigned opcode)
+{
+ return tc_add1(tc, opcode, tdst_null(), tsrc_null());
+}
+
+#define TC_ALU0(func, opcode) \
+static inline struct toy_inst * \
+func(struct toy_compiler *tc) \
+{ \
+ return tc_add0(tc, opcode); \
+}
+
+#define TC_ALU1(func, opcode) \
+static inline struct toy_inst * \
+func(struct toy_compiler *tc, \
+ struct toy_dst dst, \
+ struct toy_src src) \
+{ \
+ return tc_add1(tc, opcode, dst, src); \
+}
+
+#define TC_ALU2(func, opcode) \
+static inline struct toy_inst * \
+func(struct toy_compiler *tc, \
+ struct toy_dst dst, \
+ struct toy_src src0, \
+ struct toy_src src1) \
+{ \
+ return tc_add2(tc, opcode, \
+ dst, src0, src1); \
+}
+
+#define TC_ALU3(func, opcode) \
+static inline struct toy_inst * \
+func(struct toy_compiler *tc, \
+ struct toy_dst dst, \
+ struct toy_src src0, \
+ struct toy_src src1, \
+ struct toy_src src2) \
+{ \
+ return tc_add3(tc, opcode, \
+ dst, src0, src1, src2); \
+}
+
+#define TC_CND2(func, opcode) \
+static inline struct toy_inst * \
+func(struct toy_compiler *tc, \
+ struct toy_dst dst, \
+ struct toy_src src0, \
+ struct toy_src src1, \
+ unsigned cond_modifier) \
+{ \
+ struct toy_inst *inst; \
+ inst = tc_add2(tc, opcode, \
+ dst, src0, src1); \
+ inst->cond_modifier = cond_modifier; \
+ return inst; \
+}
+
+TC_ALU0(tc_NOP, BRW_OPCODE_NOP)
+TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE)
+TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF)
+TC_ALU1(tc_MOV, BRW_OPCODE_MOV)
+TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD)
+TC_ALU1(tc_INV, TOY_OPCODE_INV)
+TC_ALU1(tc_FRC, BRW_OPCODE_FRC)
+TC_ALU1(tc_EXP, TOY_OPCODE_EXP)
+TC_ALU1(tc_LOG, TOY_OPCODE_LOG)
+TC_ALU2(tc_ADD, BRW_OPCODE_ADD)
+TC_ALU2(tc_MUL, BRW_OPCODE_MUL)
+TC_ALU2(tc_AND, BRW_OPCODE_AND)
+TC_ALU2(tc_OR, BRW_OPCODE_OR)
+TC_ALU2(tc_DP2, BRW_OPCODE_DP2)
+TC_ALU2(tc_DP3, BRW_OPCODE_DP3)
+TC_ALU2(tc_DP4, BRW_OPCODE_DP4)
+TC_ALU2(tc_SHL, BRW_OPCODE_SHL)
+TC_ALU2(tc_SHR, BRW_OPCODE_SHR)
+TC_ALU2(tc_POW, TOY_OPCODE_POW)
+TC_ALU3(tc_MAC, BRW_OPCODE_MAC)
+TC_CND2(tc_SEL, BRW_OPCODE_SEL)
+TC_CND2(tc_CMP, BRW_OPCODE_CMP)
+TC_CND2(tc_IF, BRW_OPCODE_IF)
+TC_CND2(tc_SEND, BRW_OPCODE_SEND)
+
+/**
+ * Upcast a list_head to an instruction.
+ */
+static inline struct toy_inst *
+tc_list_to_inst(struct toy_compiler *tc, struct list_head *item)
+{
+ return container_of(item, (struct toy_inst *) NULL, list);
+}
+
+/**
+ * Return the instruction at the current location.
+ */
+static inline struct toy_inst *
+tc_current(struct toy_compiler *tc)
+{
+ return (tc->iter != &tc->instructions) ?
+ tc_list_to_inst(tc, tc->iter) : NULL;
+}
+
+/**
+ * Set the current location to the head.
+ */
+static inline void
+tc_head(struct toy_compiler *tc)
+{
+ tc->iter = &tc->instructions;
+ tc->iter_next = tc->iter->next;
+}
+
+/**
+ * Set the current location to the tail.
+ */
+static inline void
+tc_tail(struct toy_compiler *tc)
+{
+ tc->iter = &tc->instructions;
+ tc->iter_next = tc->iter;
+}
+
+/**
+ * Advance the current location.
+ */
+static inline struct toy_inst *
+tc_next_no_skip(struct toy_compiler *tc)
+{
+ /* stay at the tail so that new instructions are added there */
+ if (tc->iter_next == &tc->instructions) {
+ tc_tail(tc);
+ return NULL;
+ }
+
+ tc->iter = tc->iter_next;
+ tc->iter_next = tc->iter_next->next;
+
+ return tc_list_to_inst(tc, tc->iter);
+}
+
+/**
+ * Advance the current location, skipping markers.
+ */
+static inline struct toy_inst *
+tc_next(struct toy_compiler *tc)
+{
+ struct toy_inst *inst;
+
+ do {
+ inst = tc_next_no_skip(tc);
+ } while (inst && inst->marker);
+
+ return inst;
+}
+
+static inline void
+tc_fail(struct toy_compiler *tc, const char *reason)
+{
+ if (!tc->fail) {
+ tc->fail = true;
+ tc->reason = reason;
+ }
+}
+
+void
+toy_compiler_init(struct toy_compiler *tc, int gen);
+
+void
+toy_compiler_cleanup(struct toy_compiler *tc);
+
+void
+toy_compiler_dump(struct toy_compiler *tc);
+
+void *
+toy_compiler_assemble(struct toy_compiler *tc, int *size);
+
+void
+toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size);
+
+#endif /* TOY_COMPILER_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_asm.c b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c
new file mode 100644
index 00000000000..09a00dd2211
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c
@@ -0,0 +1,750 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "toy_compiler.h"
+
+#define CG_REG_SHIFT 5
+#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
+
+struct codegen {
+ const struct toy_inst *inst;
+ int pc;
+
+ unsigned flag_sub_reg_num;
+
+ struct codegen_dst {
+ unsigned file;
+ unsigned type;
+ bool indirect;
+ unsigned indirect_subreg;
+ unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
+
+ unsigned horz_stride;
+
+ unsigned writemask;
+ } dst;
+
+ struct codegen_src {
+ unsigned file;
+ unsigned type;
+ bool indirect;
+ unsigned indirect_subreg;
+ unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
+
+ unsigned vert_stride;
+ unsigned width;
+ unsigned horz_stride;
+
+ unsigned swizzle[4];
+ bool absolute;
+ bool negate;
+ } src[3];
+};
+
+/**
+ * Return true if the source operand is null.
+ */
+static bool
+src_is_null(const struct codegen *cg, int idx)
+{
+ const struct codegen_src *src = &cg->src[idx];
+
+ return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
+}
+
+/**
+ * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_src(const struct codegen *cg, int idx)
+{
+ const struct codegen_src *src = &cg->src[idx];
+ uint32_t dw;
+
+ /* special treatment may be needed if any of the operand is immediate */
+ if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
+ assert(!cg->src[0].absolute && !cg->src[0].negate);
+ /* only the last src operand can be an immediate */
+ assert(src_is_null(cg, 1));
+
+ if (idx == 0)
+ return cg->flag_sub_reg_num << 25;
+ else
+ return cg->src[0].origin;
+ }
+ else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
+ assert(!cg->src[1].absolute && !cg->src[1].negate);
+ return cg->src[1].origin;
+ }
+
+ assert(src->file != BRW_IMMEDIATE_VALUE);
+
+ if (src->indirect) {
+ const int offset = (int) src->origin;
+
+ assert(src->file == BRW_GENERAL_REGISTER_FILE);
+ assert(offset < 512 && offset >= -512);
+
+ if (cg->inst->access_mode == BRW_ALIGN_16) {
+ assert(src->width == BRW_WIDTH_4);
+ assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+ /* the lower 4 bits are reserved for the swizzle_[xy] */
+ assert(!(src->origin & 0xf));
+
+ dw = src->vert_stride << 21 |
+ src->swizzle[3] << 18 |
+ src->swizzle[2] << 16 |
+ BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+ src->negate << 14 |
+ src->absolute << 13 |
+ src->indirect_subreg << 10 |
+ (src->origin & 0x3f0) |
+ src->swizzle[1] << 2 |
+ src->swizzle[0];
+ }
+ else {
+ assert(src->swizzle[0] == TOY_SWIZZLE_X &&
+ src->swizzle[1] == TOY_SWIZZLE_Y &&
+ src->swizzle[2] == TOY_SWIZZLE_Z &&
+ src->swizzle[3] == TOY_SWIZZLE_W);
+
+ dw = src->vert_stride << 21 |
+ src->width << 18 |
+ src->horz_stride << 16 |
+ BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+ src->negate << 14 |
+ src->absolute << 13 |
+ src->indirect_subreg << 10 |
+ (src->origin & 0x3ff);
+ }
+ }
+ else {
+ switch (src->file) {
+ case BRW_ARCHITECTURE_REGISTER_FILE:
+ break;
+ case BRW_GENERAL_REGISTER_FILE:
+ assert(CG_REG_NUM(src->origin) < 128);
+ break;
+ case BRW_MESSAGE_REGISTER_FILE:
+ assert(cg->inst->opcode == BRW_OPCODE_SEND ||
+ cg->inst->opcode == BRW_OPCODE_SENDC);
+ assert(CG_REG_NUM(src->origin) < 16);
+ break;
+ case BRW_IMMEDIATE_VALUE:
+ default:
+ assert(!"invalid src file");
+ break;
+ }
+
+ if (cg->inst->access_mode == BRW_ALIGN_16) {
+ assert(src->width == BRW_WIDTH_4);
+ assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+ /* the lower 4 bits are reserved for the swizzle_[xy] */
+ assert(!(src->origin & 0xf));
+
+ dw = src->vert_stride << 21 |
+ src->swizzle[3] << 18 |
+ src->swizzle[2] << 16 |
+ BRW_ADDRESS_DIRECT << 15 |
+ src->negate << 14 |
+ src->absolute << 13 |
+ src->origin |
+ src->swizzle[1] << 2 |
+ src->swizzle[0];
+ }
+ else {
+ assert(src->swizzle[0] == TOY_SWIZZLE_X &&
+ src->swizzle[1] == TOY_SWIZZLE_Y &&
+ src->swizzle[2] == TOY_SWIZZLE_Z &&
+ src->swizzle[3] == TOY_SWIZZLE_W);
+
+ dw = src->vert_stride << 21 |
+ src->width << 18 |
+ src->horz_stride << 16 |
+ BRW_ADDRESS_DIRECT << 15 |
+ src->negate << 14 |
+ src->absolute << 13 |
+ src->origin;
+ }
+ }
+
+ if (idx == 0)
+ dw |= cg->flag_sub_reg_num << 25;
+
+ return dw;
+}
+
+/**
+ * Translate the destination operand to the higher 16 bits of DW1 of the
+ * 1-src/2-src format.
+ */
+static uint16_t
+translate_dst_region(const struct codegen *cg)
+{
+ const struct codegen_dst *dst = &cg->dst;
+ uint16_t dw1_region;
+
+ if (dst->file == BRW_IMMEDIATE_VALUE) {
+ /* dst is immediate (JIP) when the opcode is a conditional branch */
+ switch (cg->inst->opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_WHILE:
+ assert(dst->type == BRW_REGISTER_TYPE_W);
+ dw1_region = (dst->origin & 0xffff);
+ break;
+ default:
+ assert(!"dst cannot be immediate");
+ dw1_region = 0;
+ break;
+ }
+
+ return dw1_region;
+ }
+
+ if (dst->indirect) {
+ const int offset = (int) dst->origin;
+
+ assert(dst->file == BRW_GENERAL_REGISTER_FILE);
+ assert(offset < 512 && offset >= -512);
+
+ if (cg->inst->access_mode == BRW_ALIGN_16) {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 2, page 144:
+ *
+ * "Allthough Dst.HorzStride is a don't care for Align16, HW
+ * needs this to be programmed as 01."
+ */
+ assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+ /* the lower 4 bits are reserved for the writemask */
+ assert(!(dst->origin & 0xf));
+
+ dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+ dst->horz_stride << 13 |
+ dst->indirect_subreg << 10 |
+ (dst->origin & 0x3f0) |
+ dst->writemask;
+ }
+ else {
+ assert(dst->writemask == TOY_WRITEMASK_XYZW);
+
+ dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+ dst->horz_stride << 13 |
+ dst->indirect_subreg << 10 |
+ (dst->origin & 0x3ff);
+ }
+ }
+ else {
+ assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
+ CG_REG_NUM(dst->origin) < 128) ||
+ (dst->file == BRW_MESSAGE_REGISTER_FILE &&
+ CG_REG_NUM(dst->origin) < 16) ||
+ (dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
+
+ if (cg->inst->access_mode == BRW_ALIGN_16) {
+ /* similar to the indirect case */
+ assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+ assert(!(dst->origin & 0xf));
+
+ dw1_region = BRW_ADDRESS_DIRECT << 15 |
+ dst->horz_stride << 13 |
+ dst->origin |
+ dst->writemask;
+ }
+ else {
+ assert(dst->writemask == TOY_WRITEMASK_XYZW);
+
+ dw1_region = BRW_ADDRESS_DIRECT << 15 |
+ dst->horz_stride << 13 |
+ dst->origin;
+ }
+ }
+
+ return dw1_region;
+}
+
+/**
+ * Translate the destination operand to DW1 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_dst(const struct codegen *cg)
+{
+ return translate_dst_region(cg) << 16 |
+ cg->src[1].type << 12 |
+ cg->src[1].file << 10 |
+ cg->src[0].type << 7 |
+ cg->src[0].file << 5 |
+ cg->dst.type << 2 |
+ cg->dst.file;
+}
+
+/**
+ * Translate the instruction to DW0 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_inst(const struct codegen *cg)
+{
+ const bool debug_ctrl = false;
+ const bool cmpt_ctrl = false;
+
+ assert(cg->inst->opcode < 128);
+
+ return cg->inst->saturate << 31 |
+ debug_ctrl << 30 |
+ cmpt_ctrl << 29 |
+ cg->inst->acc_wr_ctrl << 28 |
+ cg->inst->cond_modifier << 24 |
+ cg->inst->exec_size << 21 |
+ cg->inst->pred_inv << 20 |
+ cg->inst->pred_ctrl << 16 |
+ cg->inst->thread_ctrl << 14 |
+ cg->inst->qtr_ctrl << 12 |
+ cg->inst->dep_ctrl << 10 |
+ cg->inst->mask_ctrl << 9 |
+ cg->inst->access_mode << 8 |
+ cg->inst->opcode;
+}
+
+/**
+ * Codegen an instruction in 1-src/2-src format.
+ */
+static void
+codegen_inst(const struct codegen *cg, uint32_t *code)
+{
+ code[0] = translate_inst(cg);
+ code[1] = translate_dst(cg);
+ code[2] = translate_src(cg, 0);
+ code[3] = translate_src(cg, 1);
+ assert(src_is_null(cg, 2));
+}
+
+/**
+ * Codegen an instruction in 3-src format.
+ */
+static void
+codegen_inst_3src(const struct codegen *cg, uint32_t *code)
+{
+ const struct codegen_dst *dst = &cg->dst;
+ uint32_t dw0, dw1, dw_src[3];
+ int i;
+
+ dw0 = translate_inst(cg);
+
+ /*
+ * 3-src instruction restrictions
+ *
+ * - align16 with direct addressing
+ * - GRF or MRF dst
+ * - GRF src
+ * - sub_reg_num is DWORD aligned
+ * - no regioning except replication control
+ * (vert_stride == 0 && horz_stride == 0)
+ */
+ assert(cg->inst->access_mode == BRW_ALIGN_16);
+
+ assert(!dst->indirect);
+ assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
+ CG_REG_NUM(dst->origin) < 128) ||
+ (dst->file == BRW_MESSAGE_REGISTER_FILE &&
+ CG_REG_NUM(dst->origin) < 16));
+ assert(!(dst->origin & 0x3));
+ assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+ dw1 = dst->origin << 19 |
+ dst->writemask << 17 |
+ cg->src[2].negate << 9 |
+ cg->src[2].absolute << 8 |
+ cg->src[1].negate << 7 |
+ cg->src[1].absolute << 6 |
+ cg->src[0].negate << 5 |
+ cg->src[0].absolute << 4 |
+ cg->flag_sub_reg_num << 1 |
+ (dst->file == BRW_MESSAGE_REGISTER_FILE);
+
+ for (i = 0; i < 3; i++) {
+ const struct codegen_src *src = &cg->src[i];
+
+ assert(!src->indirect);
+ assert(src->file == BRW_GENERAL_REGISTER_FILE &&
+ CG_REG_NUM(src->origin) < 128);
+ assert(!(src->origin & 0x3));
+
+ assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
+ src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
+ (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
+ src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
+ assert(src->width == BRW_WIDTH_4);
+
+ dw_src[i] = src->origin << 7 |
+ src->swizzle[3] << 7 |
+ src->swizzle[2] << 5 |
+ src->swizzle[1] << 3 |
+ src->swizzle[0] << 1 |
+ (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
+ src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
+
+ /* only the lower 20 bits are used */
+ assert((dw_src[i] & 0xfffff) == dw_src[i]);
+ }
+
+ code[0] = dw0;
+ code[1] = dw1;
+ /* concatenate the bits of dw_src */
+ code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
+ code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
+}
+
+/**
+ * Sanity check the region parameters of the operands.
+ */
+static void
+codegen_validate_region_restrictions(const struct codegen *cg)
+{
+ const int exec_size_map[] = {
+ [BRW_EXECUTE_1] = 1,
+ [BRW_EXECUTE_2] = 2,
+ [BRW_EXECUTE_4] = 4,
+ [BRW_EXECUTE_8] = 8,
+ [BRW_EXECUTE_16] = 16,
+ [BRW_EXECUTE_32] = 32,
+ };
+ const int width_map[] = {
+ [BRW_WIDTH_1] = 1,
+ [BRW_WIDTH_2] = 2,
+ [BRW_WIDTH_4] = 4,
+ [BRW_WIDTH_8] = 8,
+ [BRW_WIDTH_16] = 16,
+ };
+ const int horz_stride_map[] = {
+ [BRW_HORIZONTAL_STRIDE_0] = 0,
+ [BRW_HORIZONTAL_STRIDE_1] = 1,
+ [BRW_HORIZONTAL_STRIDE_2] = 2,
+ [BRW_HORIZONTAL_STRIDE_4] = 4,
+ };
+ const int vert_stride_map[] = {
+ [BRW_VERTICAL_STRIDE_0] = 0,
+ [BRW_VERTICAL_STRIDE_1] = 1,
+ [BRW_VERTICAL_STRIDE_2] = 2,
+ [BRW_VERTICAL_STRIDE_4] = 4,
+ [BRW_VERTICAL_STRIDE_8] = 8,
+ [BRW_VERTICAL_STRIDE_16] = 16,
+ [BRW_VERTICAL_STRIDE_32] = 32,
+ [BRW_VERTICAL_STRIDE_64] = 64,
+ [BRW_VERTICAL_STRIDE_128] = 128,
+ [BRW_VERTICAL_STRIDE_256] = 256,
+ [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
+ };
+ const int exec_size = exec_size_map[cg->inst->exec_size];
+ int i;
+
+ /* Sandy Bridge PRM, volume 4 part 2, page 94 */
+
+ /* 1. (we don't do 32 anyway) */
+ assert(exec_size <= 16);
+
+ for (i = 0; i < Elements(cg->src); i++) {
+ const int width = width_map[cg->src[i].width];
+ const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
+ const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
+
+ if (src_is_null(cg, i))
+ break;
+
+ /* 3. */
+ assert(exec_size >= width);
+
+ if (exec_size == width) {
+ /* 4. & 5. */
+ if (horz_stride)
+ assert(vert_stride == width * horz_stride);
+ }
+
+ if (width == 1) {
+ /* 6. */
+ assert(horz_stride == 0);
+
+ /* 7. */
+ if (exec_size == 1)
+ assert(vert_stride == 0);
+ }
+
+ /* 8. */
+ if (!vert_stride && !horz_stride)
+ assert(width == 1);
+ }
+
+ /* derived from 10.1.2. & 10.2. */
+ assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
+}
+
+static unsigned
+translate_vfile(enum toy_file file)
+{
+ switch (file) {
+ case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE;
+ case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE;
+ case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE;
+ case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE;
+ default:
+ assert(!"unhandled toy file");
+ return BRW_GENERAL_REGISTER_FILE;
+ }
+}
+
+static unsigned
+translate_vtype(enum toy_type type)
+{
+ switch (type) {
+ case TOY_TYPE_F: return BRW_REGISTER_TYPE_F;
+ case TOY_TYPE_D: return BRW_REGISTER_TYPE_D;
+ case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD;
+ case TOY_TYPE_W: return BRW_REGISTER_TYPE_W;
+ case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW;
+ case TOY_TYPE_V: return BRW_REGISTER_TYPE_V;
+ default:
+ assert(!"unhandled toy type");
+ return BRW_REGISTER_TYPE_F;
+ }
+}
+
+static unsigned
+translate_writemask(enum toy_writemask writemask)
+{
+ /* TOY_WRITEMASK_* are compatible with the hardware definitions */
+ assert(writemask <= 0xf);
+ return writemask;
+}
+
+static unsigned
+translate_swizzle(enum toy_swizzle swizzle)
+{
+ /* TOY_SWIZZLE_* are compatible with the hardware definitions */
+ assert(swizzle <= 3);
+ return swizzle;
+}
+
+/**
+ * Prepare for generating an instruction.
+ */
+static void
+codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
+ int pc, int rect_linear_width)
+{
+ int i;
+
+ cg->inst = inst;
+ cg->pc = pc;
+
+ cg->flag_sub_reg_num = 0;
+
+ cg->dst.file = translate_vfile(inst->dst.file);
+ cg->dst.type = translate_vtype(inst->dst.type);
+ cg->dst.indirect = inst->dst.indirect;
+ cg->dst.indirect_subreg = inst->dst.indirect_subreg;
+ cg->dst.origin = inst->dst.val32;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 2, page 81:
+ *
+ * "For a word or an unsigned word immediate data, software must
+ * replicate the same 16-bit immediate value to both the lower word
+ * and the high word of the 32-bit immediate field in an instruction."
+ */
+ if (inst->dst.file == TOY_FILE_IMM) {
+ switch (inst->dst.type) {
+ case TOY_TYPE_W:
+ case TOY_TYPE_UW:
+ cg->dst.origin &= 0xffff;
+ cg->dst.origin |= cg->dst.origin << 16;
+ break;
+ default:
+ break;
+ }
+ }
+
+ cg->dst.writemask = translate_writemask(inst->dst.writemask);
+
+ switch (inst->dst.rect) {
+ case TOY_RECT_LINEAR:
+ cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
+ break;
+ default:
+ assert(!"unsupported dst region");
+ cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
+ break;
+ }
+
+ for (i = 0; i < Elements(cg->src); i++) {
+ struct codegen_src *src = &cg->src[i];
+
+ src->file = translate_vfile(inst->src[i].file);
+ src->type = translate_vtype(inst->src[i].type);
+ src->indirect = inst->src[i].indirect;
+ src->indirect_subreg = inst->src[i].indirect_subreg;
+ src->origin = inst->src[i].val32;
+
+ /* do the same for src */
+ if (inst->dst.file == TOY_FILE_IMM) {
+ switch (inst->src[i].type) {
+ case TOY_TYPE_W:
+ case TOY_TYPE_UW:
+ src->origin &= 0xffff;
+ src->origin |= src->origin << 16;
+ break;
+ default:
+ break;
+ }
+ }
+
+ src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
+ src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
+ src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
+ src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
+ src->absolute = inst->src[i].absolute;
+ src->negate = inst->src[i].negate;
+
+ switch (inst->src[i].rect) {
+ case TOY_RECT_LINEAR:
+ switch (rect_linear_width) {
+ case 1:
+ src->vert_stride = BRW_VERTICAL_STRIDE_1;
+ src->width = BRW_WIDTH_1;
+ break;
+ case 2:
+ src->vert_stride = BRW_VERTICAL_STRIDE_2;
+ src->width = BRW_WIDTH_2;
+ break;
+ case 4:
+ src->vert_stride = BRW_VERTICAL_STRIDE_4;
+ src->width = BRW_WIDTH_4;
+ break;
+ case 8:
+ src->vert_stride = BRW_VERTICAL_STRIDE_8;
+ src->width = BRW_WIDTH_8;
+ break;
+ case 16:
+ src->vert_stride = BRW_VERTICAL_STRIDE_16;
+ src->width = BRW_WIDTH_16;
+ break;
+ default:
+ assert(!"unsupported TOY_RECT_LINEAR width");
+ src->vert_stride = BRW_VERTICAL_STRIDE_1;
+ src->width = BRW_WIDTH_1;
+ break;
+ }
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+ break;
+ case TOY_RECT_041:
+ src->vert_stride = BRW_VERTICAL_STRIDE_0;
+ src->width = BRW_WIDTH_4;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+ break;
+ case TOY_RECT_010:
+ src->vert_stride = BRW_VERTICAL_STRIDE_0;
+ src->width = BRW_WIDTH_1;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+ break;
+ case TOY_RECT_220:
+ src->vert_stride = BRW_VERTICAL_STRIDE_2;
+ src->width = BRW_WIDTH_2;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+ break;
+ case TOY_RECT_440:
+ src->vert_stride = BRW_VERTICAL_STRIDE_4;
+ src->width = BRW_WIDTH_4;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+ break;
+ case TOY_RECT_240:
+ src->vert_stride = BRW_VERTICAL_STRIDE_2;
+ src->width = BRW_WIDTH_4;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+ break;
+ default:
+ assert(!"unsupported src region");
+ src->vert_stride = BRW_VERTICAL_STRIDE_1;
+ src->width = BRW_WIDTH_1;
+ src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+ break;
+ }
+ }
+}
+
+/**
+ * Generate HW shader code. The instructions should have been legalized.
+ */
+void *
+toy_compiler_assemble(struct toy_compiler *tc, int *size)
+{
+ const struct toy_inst *inst;
+ uint32_t *code;
+ int pc;
+
+ code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
+ if (!code)
+ return NULL;
+
+ pc = 0;
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ uint32_t *dw = &code[pc * 4];
+ struct codegen cg;
+
+ if (pc >= tc->num_instructions) {
+ tc_fail(tc, "wrong instructoun count");
+ break;
+ }
+
+ codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
+ codegen_validate_region_restrictions(&cg);
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MAD:
+ codegen_inst_3src(&cg, dw);
+ break;
+ default:
+ codegen_inst(&cg, dw);
+ break;
+ }
+
+ pc++;
+ }
+
+ /* never return an invalid kernel */
+ if (tc->fail) {
+ FREE(code);
+ return NULL;
+ }
+
+ if (size)
+ *size = pc * 4 * sizeof(uint32_t);
+
+ return code;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
new file mode 100644
index 00000000000..bedbc3d53c8
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
@@ -0,0 +1,1385 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+typedef short GLshort;
+typedef int GLint;
+typedef unsigned char GLubyte;
+typedef unsigned int GLuint;
+typedef float GLfloat;
+#include <stdint.h>
+#include "brw_defines.h"
+#include "brw_structs.h"
+static int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+#include "toy_compiler.h"
+
+void
+toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size)
+{
+ /* set this to true to dump the hex */
+ const bool dump_hex = false;
+ const struct brw_instruction *instructions = kernel;
+ int i;
+
+ for (i = 0; i < size / sizeof(*instructions); i++) {
+ if (dump_hex) {
+ const uint32_t *dwords = (const uint32_t *) &instructions[i];
+ ilo_printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ dwords[3], dwords[2], dwords[1], dwords[0]);
+ }
+
+ brw_disasm(stderr, (struct brw_instruction *) &instructions[i],
+ ILO_GEN_GET_MAJOR(tc->gen));
+ }
+}
+
+static const struct opcode_desc {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode_descs[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+ [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+static const struct opcode_desc *opcode = opcode_descs;
+
+static const char * const conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+static const char * const negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+static const char * const _abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+static const char * const vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+static const char * const width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+static const char * const horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+static const char * const chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+static const char * const debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+static const char * const saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+static const char * const accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+static const char * const wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
+static const char * const exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+static const char * const pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+static const char * const pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+static const char * const pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+static const char * const thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+static const char * const compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+ [3] = "compr4",
+};
+
+static const char * const dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+static const char * const mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+static const char * const access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+static const char * const reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+const int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
+static const char * const reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+static const char * const writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+static const char * const end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+static const char * const target_function[16] = {
+ [BRW_SFID_NULL] = "null",
+ [BRW_SFID_MATH] = "math",
+ [BRW_SFID_SAMPLER] = "sampler",
+ [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+ [BRW_SFID_DATAPORT_READ] = "read",
+ [BRW_SFID_DATAPORT_WRITE] = "write",
+ [BRW_SFID_URB] = "urb",
+ [BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
+};
+
+static const char * const target_function_gen6[16] = {
+ [BRW_SFID_NULL] = "null",
+ [BRW_SFID_MATH] = "math",
+ [BRW_SFID_SAMPLER] = "sampler",
+ [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+ [BRW_SFID_URB] = "urb",
+ [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+ [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+ [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+ [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+ [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char * const dp_rc_msg_type_gen6[16] = {
+ [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+ [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+ [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+ [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
+static const char * const math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_FDIV] = "fdiv",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
+};
+
+static const char * const math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+static const char * const math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+static const char * const math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+static const char * const math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+static const char * const urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
+static const char * const urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+static const char * const urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+static const char * const urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+static const char * const urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+static const char * const sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, const char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, const char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ va_end (args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, const char *name, const char * const ctrl[],
+ GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
+ string (file, "<");
+ err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL);
+ string (file, ">");
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]<");
+ err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL);
+ string (file, ">");
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ uint32_t reg_file;
+
+ if (inst->bits1.da3src.dest_reg_file)
+ reg_file = BRW_MESSAGE_REGISTER_FILE;
+ else
+ reg_file = BRW_GENERAL_REGISTER_FILE;
+
+ err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da3src.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",4,1>");
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ return err;
+}
+
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits2.da3src.src0_subreg_nr)
+ format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+ GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+ (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src1_reg_nr);
+ if (err == -1)
+ return 0;
+ if (src1_subreg_nr)
+ format (file, ".%d", src1_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src2_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits3.da3src.src2_subreg_nr)
+ format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int esize[6] = {
+ [0] = 1,
+ [1] = 2,
+ [2] = 4,
+ [3] = 8,
+ [4] = 16,
+ [5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+ int qtr_ctl = inst->header.compression_control;
+ int exec_size = esize[inst->header.execution_size];
+
+ if (exec_size == 8) {
+ switch (qtr_ctl) {
+ case 0:
+ string (file, " 1Q");
+ break;
+ case 1:
+ string (file, " 2Q");
+ break;
+ case 2:
+ string (file, " 3Q");
+ break;
+ case 3:
+ string (file, " 4Q");
+ break;
+ }
+ } else if (exec_size == 16){
+ if (qtr_ctl < 2)
+ string (file, " 1H");
+ else
+ string (file, " 2H");
+ }
+ return 0;
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+ if (inst->bits2.da1.flag_subreg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode == BRW_OPCODE_MATH) {
+ string (file, " ");
+ err |= control (file, "function", math_function,
+ inst->header.destreg__conditionalmod, NULL);
+ } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+ inst->header.opcode != BRW_OPCODE_SENDC) {
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ /* If we're using the conditional modifier, print which flags reg is
+ * used for it. Note that on gen6+, the embedded-condition SEL and
+ * control flow doesn't update flags.
+ */
+ if (inst->header.destreg__conditionalmod &&
+ (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+ inst->header.opcode != BRW_OPCODE_IF &&
+ inst->header.opcode != BRW_OPCODE_WHILE))) {
+ format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+ if (inst->bits2.da1.flag_subreg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+ }
+ }
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].nsrc == 3) {
+ pad (file, 16);
+ err |= dest_3src (file, inst);
+
+ pad (file, 32);
+ err |= src0_3src (file, inst);
+
+ pad (file, 48);
+ err |= src1_3src (file, inst);
+
+ pad (file, 64);
+ err |= src2_3src (file, inst);
+ } else {
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits3.break_cont.jip);
+ } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+ inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits1.branch_gen6.jump_count);
+ } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+ inst->header.opcode == BRW_OPCODE_CONTINUE ||
+ inst->header.opcode == BRW_OPCODE_HALT)) ||
+ (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+ format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+ } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+ format (file, " %d", inst->bits3.d);
+ }
+
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC) {
+ enum brw_message_target target;
+
+ if (gen >= 6)
+ target = inst->header.destreg__conditionalmod;
+ else if (gen == 5)
+ target = inst->bits2.send_gen5.sfid;
+ else
+ target = inst->bits3.generic.msg_target;
+
+ newline (file);
+ pad (file, 16);
+ space = 0;
+
+ if (gen >= 6) {
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+ } else {
+ err |= control (file, "target function", target_function,
+ target, &space);
+ }
+
+ switch (target) {
+ case BRW_SFID_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_SFID_SAMPLER:
+ if (gen >= 7) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen7.binding_table_index,
+ inst->bits3.sampler_gen7.sampler,
+ inst->bits3.sampler_gen7.msg_type,
+ inst->bits3.sampler_gen7.simd_mode);
+ } else if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format",
+ sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_SFID_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
+ break;
+
+ case BRW_SFID_DATAPORT_WRITE:
+ if (gen >= 7) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen7_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ } else if (gen == 6) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen6_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.last_render_target << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
+ break;
+
+ case BRW_SFID_URB:
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
+
+ space = 1;
+ if (gen >= 5) {
+ err |= control (file, "urb opcode", urb_opcode,
+ inst->bits3.urb_gen5.opcode, &space);
+ }
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_SFID_THREAD_SPAWNER:
+ break;
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ format (file, " (%d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ break;
+
+
+ default:
+ format (file, "unsupported target %d", target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ if (gen >= 6)
+ err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+ else
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+
+ if (gen >= 6)
+ err |= qtr_ctrl (file, inst);
+ else {
+ if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+ opcode[inst->header.opcode].ndst > 0 &&
+ inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+ format (file, " compr4");
+ } else {
+ err |= control (file, "compression control", compr_ctrl,
+ inst->header.compression_control, &space);
+ }
+ }
+
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_reg.h b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h
new file mode 100644
index 00000000000..8c11b3a3275
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h
@@ -0,0 +1,800 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_REG_H
+#define TOY_REG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h" /* for assert() */
+#include "util/u_math.h" /* for union fi */
+
+/* a toy reg is 256-bit wide */
+#define TOY_REG_WIDTH 32
+
+/**
+ * Register files.
+ */
+enum toy_file {
+ /* virtual register file */
+ TOY_FILE_VRF,
+
+ TOY_FILE_ARF,
+ TOY_FILE_GRF,
+ TOY_FILE_MRF,
+ TOY_FILE_IMM,
+
+ TOY_FILE_COUNT,
+};
+
+/**
+ * Register types.
+ */
+enum toy_type {
+ TOY_TYPE_F,
+ TOY_TYPE_D,
+ TOY_TYPE_UD,
+ TOY_TYPE_W,
+ TOY_TYPE_UW,
+ TOY_TYPE_V, /* only valid for immediates */
+
+ TOY_TYPE_COUNT,
+};
+
+/**
+ * Register rectangles. The three numbers stand for vertical stride, width,
+ * and horizontal stride respectively.
+ */
+enum toy_rect {
+ TOY_RECT_LINEAR,
+ TOY_RECT_041,
+ TOY_RECT_010,
+ TOY_RECT_220,
+ TOY_RECT_440,
+ TOY_RECT_240,
+
+ TOY_RECT_COUNT,
+};
+
+/**
+ * Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware
+ * values.
+ */
+enum toy_swizzle {
+ TOY_SWIZZLE_X = 0,
+ TOY_SWIZZLE_Y = 1,
+ TOY_SWIZZLE_Z = 2,
+ TOY_SWIZZLE_W = 3,
+};
+
+/**
+ * Destination writemasks. They are compatible with TGSI_WRITEMASK_x and
+ * hardware values.
+ */
+enum toy_writemask {
+ TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X),
+ TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y),
+ TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z),
+ TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W),
+ TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y),
+ TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z),
+ TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
+ TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
+ TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+ TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y |
+ TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+};
+
+/**
+ * Destination operand.
+ */
+struct toy_dst {
+ unsigned file:3; /* TOY_FILE_x */
+ unsigned type:3; /* TOY_TYPE_x */
+ unsigned rect:3; /* TOY_RECT_x */
+ unsigned indirect:1; /* true or false */
+ unsigned indirect_subreg:6; /* which subreg of a0? */
+
+ unsigned writemask:4; /* TOY_WRITEMASK_x */
+ unsigned pad:12;
+
+ uint32_t val32;
+};
+
+/**
+ * Source operand.
+ */
+struct toy_src {
+ unsigned file:3; /* TOY_FILE_x */
+ unsigned type:3; /* TOY_TYPE_x */
+ unsigned rect:3; /* TOY_RECT_x */
+ unsigned indirect:1; /* true or false */
+ unsigned indirect_subreg:6; /* which subreg of a0? */
+
+ unsigned swizzle_x:2; /* TOY_SWIZZLE_x */
+ unsigned swizzle_y:2; /* TOY_SWIZZLE_x */
+ unsigned swizzle_z:2; /* TOY_SWIZZLE_x */
+ unsigned swizzle_w:2; /* TOY_SWIZZLE_x */
+ unsigned absolute:1; /* true or false */
+ unsigned negate:1; /* true or false */
+ unsigned pad:6;
+
+ uint32_t val32;
+};
+
+/**
+ * Return true if the file is virtual.
+ */
+static inline bool
+toy_file_is_virtual(enum toy_file file)
+{
+ return (file == TOY_FILE_VRF);
+}
+
+/**
+ * Return true if the file is a hardware one.
+ */
+static inline bool
+toy_file_is_hw(enum toy_file file)
+{
+ return !toy_file_is_virtual(file);
+}
+
+/**
+ * Return the size of the file.
+ */
+static inline uint32_t
+toy_file_size(enum toy_file file)
+{
+ switch (file) {
+ case TOY_FILE_GRF:
+ return 256 * TOY_REG_WIDTH;
+ case TOY_FILE_MRF:
+ /* there is no MRF on GEN7+ */
+ return 256 * TOY_REG_WIDTH;
+ default:
+ assert(!"invalid toy file");
+ return 0;
+ }
+}
+
+/**
+ * Return the size of the type.
+ */
+static inline int
+toy_type_size(enum toy_type type)
+{
+ switch (type) {
+ case TOY_TYPE_F:
+ case TOY_TYPE_D:
+ case TOY_TYPE_UD:
+ return 4;
+ case TOY_TYPE_W:
+ case TOY_TYPE_UW:
+ return 2;
+ case TOY_TYPE_V:
+ default:
+ assert(!"invalid toy type");
+ return 0;
+ }
+}
+
+/**
+ * Return true if the destination operand is null.
+ */
+static inline bool
+tdst_is_null(struct toy_dst dst)
+{
+ /* BRW_ARF_NULL happens to be 0 */
+ return (dst.file == TOY_FILE_ARF && dst.val32 == 0);
+}
+
+/**
+ * Validate the destination operand.
+ */
+static inline struct toy_dst
+tdst_validate(struct toy_dst dst)
+{
+ switch (dst.file) {
+ case TOY_FILE_VRF:
+ case TOY_FILE_ARF:
+ case TOY_FILE_MRF:
+ assert(!dst.indirect);
+ if (dst.file == TOY_FILE_MRF)
+ assert(dst.val32 < toy_file_size(dst.file));
+ break;
+ case TOY_FILE_GRF:
+ if (!dst.indirect)
+ assert(dst.val32 < toy_file_size(dst.file));
+ break;
+ case TOY_FILE_IMM:
+ /* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */
+ assert(!dst.indirect);
+ assert(dst.type == TOY_TYPE_W);
+ break;
+ default:
+ assert(!"invalid dst file");
+ break;
+ }
+
+ switch (dst.type) {
+ case TOY_TYPE_V:
+ assert(!"invalid dst type");
+ break;
+ default:
+ break;
+ }
+
+ assert(dst.rect == TOY_RECT_LINEAR);
+ if (dst.file != TOY_FILE_IMM)
+ assert(dst.val32 % toy_type_size(dst.type) == 0);
+
+ assert(dst.writemask <= TOY_WRITEMASK_XYZW);
+
+ return dst;
+}
+
+/**
+ * Change the type of the destination operand.
+ */
+static inline struct toy_dst
+tdst_type(struct toy_dst dst, enum toy_type type)
+{
+ dst.type = type;
+ return tdst_validate(dst);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_D.
+ */
+static inline struct toy_dst
+tdst_d(struct toy_dst dst)
+{
+ return tdst_type(dst, TOY_TYPE_D);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_UD.
+ */
+static inline struct toy_dst
+tdst_ud(struct toy_dst dst)
+{
+ return tdst_type(dst, TOY_TYPE_UD);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_W.
+ */
+static inline struct toy_dst
+tdst_w(struct toy_dst dst)
+{
+ return tdst_type(dst, TOY_TYPE_W);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_UW.
+ */
+static inline struct toy_dst
+tdst_uw(struct toy_dst dst)
+{
+ return tdst_type(dst, TOY_TYPE_UW);
+}
+
+/**
+ * Change the rectangle of the destination operand.
+ */
+static inline struct toy_dst
+tdst_rect(struct toy_dst dst, enum toy_rect rect)
+{
+ dst.rect = rect;
+ return tdst_validate(dst);
+}
+
+/**
+ * Apply writemask to the destination operand. Note that the current
+ * writemask is honored.
+ */
+static inline struct toy_dst
+tdst_writemask(struct toy_dst dst, enum toy_writemask writemask)
+{
+ dst.writemask &= writemask;
+ return tdst_validate(dst);
+}
+
+/**
+ * Offset the destination operand.
+ */
+static inline struct toy_dst
+tdst_offset(struct toy_dst dst, int reg, int subreg)
+{
+ dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type);
+ return tdst_validate(dst);
+}
+
+/**
+ * Construct a destination operand.
+ */
+static inline struct toy_dst
+tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect,
+ bool indirect, unsigned indirect_subreg,
+ enum toy_writemask writemask, uint32_t val32)
+{
+ struct toy_dst dst;
+
+ dst.file = file;
+ dst.type = type;
+ dst.rect = rect;
+ dst.indirect = indirect;
+ dst.indirect_subreg = indirect_subreg;
+ dst.writemask = writemask;
+ dst.pad = 0;
+
+ dst.val32 = val32;
+
+ return tdst_validate(dst);
+}
+
+/**
+ * Construct a null destination operand.
+ */
+static inline struct toy_dst
+tdst_null(void)
+{
+ static const struct toy_dst null_dst = {
+ .file = TOY_FILE_ARF,
+ .type = TOY_TYPE_F,
+ .rect = TOY_RECT_LINEAR,
+ .indirect = false,
+ .indirect_subreg = 0,
+ .writemask = TOY_WRITEMASK_XYZW,
+ .pad = 0,
+ .val32 = 0,
+ };
+
+ return null_dst;
+}
+
+/**
+ * Construct a destination operand from a source operand.
+ */
+static inline struct toy_dst
+tdst_from(struct toy_src src)
+{
+ const enum toy_writemask writemask =
+ (1 << src.swizzle_x) |
+ (1 << src.swizzle_y) |
+ (1 << src.swizzle_z) |
+ (1 << src.swizzle_w);
+
+ return tdst_full(src.file, src.type, src.rect,
+ src.indirect, src.indirect_subreg, writemask, src.val32);
+}
+
+/**
+ * Construct a destination operand, assuming the type is TOY_TYPE_F, the
+ * rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW.
+ */
+static inline struct toy_dst
+tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
+{
+ const enum toy_type type = TOY_TYPE_F;
+ const enum toy_rect rect = TOY_RECT_LINEAR;
+ const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
+
+ return tdst_full(file, type, rect,
+ false, 0, TOY_WRITEMASK_XYZW, val32);
+}
+
+/**
+ * Construct an immediate destination operand of type TOY_TYPE_W.
+ */
+static inline struct toy_dst
+tdst_imm_w(int16_t w)
+{
+ const union fi fi = { .i = w };
+
+ return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR,
+ false, 0, TOY_WRITEMASK_XYZW, fi.ui);
+}
+
+/**
+ * Return true if the source operand is null.
+ */
+static inline bool
+tsrc_is_null(struct toy_src src)
+{
+ /* BRW_ARF_NULL happens to be 0 */
+ return (src.file == TOY_FILE_ARF && src.val32 == 0);
+}
+
+/**
+ * Return true if the source operand is swizzled.
+ */
+static inline bool
+tsrc_is_swizzled(struct toy_src src)
+{
+ return (src.swizzle_x != TOY_SWIZZLE_X ||
+ src.swizzle_y != TOY_SWIZZLE_Y ||
+ src.swizzle_z != TOY_SWIZZLE_Z ||
+ src.swizzle_w != TOY_SWIZZLE_W);
+}
+
+/**
+ * Return true if the source operand is swizzled to the same channel.
+ */
+static inline bool
+tsrc_is_swizzle1(struct toy_src src)
+{
+ return (src.swizzle_x == src.swizzle_y &&
+ src.swizzle_x == src.swizzle_z &&
+ src.swizzle_x == src.swizzle_w);
+}
+
+/**
+ * Validate the source operand.
+ */
+static inline struct toy_src
+tsrc_validate(struct toy_src src)
+{
+ switch (src.file) {
+ case TOY_FILE_VRF:
+ case TOY_FILE_ARF:
+ case TOY_FILE_MRF:
+ assert(!src.indirect);
+ if (src.file == TOY_FILE_MRF)
+ assert(src.val32 < toy_file_size(src.file));
+ break;
+ case TOY_FILE_GRF:
+ if (!src.indirect)
+ assert(src.val32 < toy_file_size(src.file));
+ break;
+ case TOY_FILE_IMM:
+ assert(!src.indirect);
+ break;
+ default:
+ assert(!"invalid src file");
+ break;
+ }
+
+ switch (src.type) {
+ case TOY_TYPE_V:
+ assert(src.file == TOY_FILE_IMM);
+ break;
+ default:
+ break;
+ }
+
+ if (src.file != TOY_FILE_IMM)
+ assert(src.val32 % toy_type_size(src.type) == 0);
+
+ assert(src.swizzle_x < 4 && src.swizzle_y < 4 &&
+ src.swizzle_z < 4 && src.swizzle_w < 4);
+
+ return src;
+}
+
+/**
+ * Change the type of the source operand.
+ */
+static inline struct toy_src
+tsrc_type(struct toy_src src, enum toy_type type)
+{
+ src.type = type;
+ return tsrc_validate(src);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_D.
+ */
+static inline struct toy_src
+tsrc_d(struct toy_src src)
+{
+ return tsrc_type(src, TOY_TYPE_D);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_UD.
+ */
+static inline struct toy_src
+tsrc_ud(struct toy_src src)
+{
+ return tsrc_type(src, TOY_TYPE_UD);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_W.
+ */
+static inline struct toy_src
+tsrc_w(struct toy_src src)
+{
+ return tsrc_type(src, TOY_TYPE_W);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_UW.
+ */
+static inline struct toy_src
+tsrc_uw(struct toy_src src)
+{
+ return tsrc_type(src, TOY_TYPE_UW);
+}
+
+/**
+ * Change the rectangle of the source operand.
+ */
+static inline struct toy_src
+tsrc_rect(struct toy_src src, enum toy_rect rect)
+{
+ src.rect = rect;
+ return tsrc_validate(src);
+}
+
+/**
+ * Swizzle the source operand. Note that the current swizzles are honored.
+ */
+static inline struct toy_src
+tsrc_swizzle(struct toy_src src,
+ enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
+ enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w)
+{
+ const enum toy_swizzle current[4] = {
+ src.swizzle_x, src.swizzle_y,
+ src.swizzle_z, src.swizzle_w,
+ };
+
+ src.swizzle_x = current[swizzle_x];
+ src.swizzle_y = current[swizzle_y];
+ src.swizzle_z = current[swizzle_z];
+ src.swizzle_w = current[swizzle_w];
+
+ return tsrc_validate(src);
+}
+
+/**
+ * Swizzle the source operand to the same channel. Note that the current
+ * swizzles are honored.
+ */
+static inline struct toy_src
+tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle)
+{
+ return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle);
+}
+
+/**
+ * Set absolute and unset negate of the source operand.
+ */
+static inline struct toy_src
+tsrc_absolute(struct toy_src src)
+{
+ src.absolute = true;
+ src.negate = false;
+ return tsrc_validate(src);
+}
+
+/**
+ * Negate the source operand.
+ */
+static inline struct toy_src
+tsrc_negate(struct toy_src src)
+{
+ src.negate = !src.negate;
+ return tsrc_validate(src);
+}
+
+/**
+ * Offset the source operand.
+ */
+static inline struct toy_src
+tsrc_offset(struct toy_src src, int reg, int subreg)
+{
+ src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type);
+ return tsrc_validate(src);
+}
+
+/**
+ * Construct a source operand.
+ */
+static inline struct toy_src
+tsrc_full(enum toy_file file, enum toy_type type,
+ enum toy_rect rect, bool indirect, unsigned indirect_subreg,
+ enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
+ enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w,
+ bool absolute, bool negate,
+ uint32_t val32)
+{
+ struct toy_src src;
+
+ src.file = file;
+ src.type = type;
+ src.rect = rect;
+ src.indirect = indirect;
+ src.indirect_subreg = indirect_subreg;
+ src.swizzle_x = swizzle_x;
+ src.swizzle_y = swizzle_y;
+ src.swizzle_z = swizzle_z;
+ src.swizzle_w = swizzle_w;
+ src.absolute = absolute;
+ src.negate = negate;
+ src.pad = 0;
+
+ src.val32 = val32;
+
+ return tsrc_validate(src);
+}
+
+/**
+ * Construct a null source operand.
+ */
+static inline struct toy_src
+tsrc_null(void)
+{
+ static const struct toy_src null_src = {
+ .file = TOY_FILE_ARF,
+ .type = TOY_TYPE_F,
+ .rect = TOY_RECT_LINEAR,
+ .indirect = false,
+ .indirect_subreg = 0,
+ .swizzle_x = TOY_SWIZZLE_X,
+ .swizzle_y = TOY_SWIZZLE_Y,
+ .swizzle_z = TOY_SWIZZLE_Z,
+ .swizzle_w = TOY_SWIZZLE_W,
+ .absolute = false,
+ .negate = false,
+ .pad = 0,
+ .val32 = 0,
+ };
+
+ return null_src;
+}
+
+/**
+ * Construct a source operand from a destination operand.
+ */
+static inline struct toy_src
+tsrc_from(struct toy_dst dst)
+{
+ enum toy_swizzle swizzle[4];
+
+ if (dst.writemask == TOY_WRITEMASK_XYZW) {
+ swizzle[0] = TOY_SWIZZLE_X;
+ swizzle[1] = TOY_SWIZZLE_Y;
+ swizzle[2] = TOY_SWIZZLE_Z;
+ swizzle[3] = TOY_SWIZZLE_W;
+ }
+ else {
+ const enum toy_swizzle first =
+ (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X :
+ (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y :
+ (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z :
+ (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W :
+ TOY_SWIZZLE_X;
+
+ swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first;
+ swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first;
+ swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first;
+ swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first;
+ }
+
+ return tsrc_full(dst.file, dst.type, dst.rect,
+ dst.indirect, dst.indirect_subreg,
+ swizzle[0], swizzle[1], swizzle[2], swizzle[3],
+ false, false, dst.val32);
+}
+
+/**
+ * Construct a source operand, assuming the type is TOY_TYPE_F, the
+ * rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate.
+ */
+static inline struct toy_src
+tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
+{
+ const enum toy_type type = TOY_TYPE_F;
+ const enum toy_rect rect = TOY_RECT_LINEAR;
+ const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
+
+ return tsrc_full(file, type, rect, false, 0,
+ TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
+ TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
+ false, false, val32);
+}
+
+/**
+ * Construct an immediate source operand.
+ */
+static inline struct toy_src
+tsrc_imm(enum toy_type type, uint32_t val32)
+{
+ return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0,
+ TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
+ TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
+ false, false, val32);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_F.
+ */
+static inline struct toy_src
+tsrc_imm_f(float f)
+{
+ const union fi fi = { .f = f };
+ return tsrc_imm(TOY_TYPE_F, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_D.
+ */
+static inline struct toy_src
+tsrc_imm_d(int32_t d)
+{
+ const union fi fi = { .i = d };
+ return tsrc_imm(TOY_TYPE_D, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_UD.
+ */
+static inline struct toy_src
+tsrc_imm_ud(uint32_t ud)
+{
+ const union fi fi = { .ui = ud };
+ return tsrc_imm(TOY_TYPE_UD, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_W.
+ */
+static inline struct toy_src
+tsrc_imm_w(int16_t w)
+{
+ const union fi fi = { .i = w };
+ return tsrc_imm(TOY_TYPE_W, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_UW.
+ */
+static inline struct toy_src
+tsrc_imm_uw(uint16_t uw)
+{
+ const union fi fi = { .ui = uw };
+ return tsrc_imm(TOY_TYPE_UW, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_V.
+ */
+static inline struct toy_src
+tsrc_imm_v(uint32_t v)
+{
+ return tsrc_imm(TOY_TYPE_V, v);
+}
+
+#endif /* TOY_REG_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_helpers.h b/src/gallium/drivers/ilo/shader/toy_helpers.h
new file mode 100644
index 00000000000..dca9fd7b4c3
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_helpers.h
@@ -0,0 +1,289 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_HELPERS_H
+#define TOY_HELPERS_H
+
+#include "toy_compiler.h"
+
+/**
+ * Transpose a dst operand.
+ *
+ * Instead of processing a single vertex with each of its attributes in one
+ * register, such as
+ *
+ * r0 = [x0, y0, z0, w0]
+ *
+ * we want to process four vertices at a time
+ *
+ * r0 = [x0, y0, z0, w0]
+ * r1 = [x1, y1, z1, w1]
+ * r2 = [x2, y2, z2, w2]
+ * r3 = [x3, y3, z3, w3]
+ *
+ * but with the attribute data "transposed"
+ *
+ * r0 = [x0, x1, x2, x3]
+ * r1 = [y0, y1, y2, y3]
+ * r2 = [z0, z1, z2, z3]
+ * r3 = [w0, w1, w2, w3]
+ *
+ * This is also known as the SoA form.
+ */
+static inline void
+tdst_transpose(struct toy_dst dst, struct toy_dst *trans)
+{
+ int i;
+
+ switch (dst.file) {
+ case TOY_FILE_VRF:
+ assert(!dst.indirect);
+ for (i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ trans[i] = tdst_offset(dst, i, 0);
+ trans[i].writemask = TOY_WRITEMASK_XYZW;
+ }
+ else {
+ trans[i] = tdst_null();
+ }
+ }
+ break;
+ case TOY_FILE_ARF:
+ assert(tdst_is_null(dst));
+ for (i = 0; i < 4; i++)
+ trans[i] = dst;
+ break;
+ case TOY_FILE_GRF:
+ case TOY_FILE_MRF:
+ case TOY_FILE_IMM:
+ default:
+ assert(!"unexpected file in dst transposition");
+ for (i = 0; i < 4; i++)
+ trans[i] = tdst_null();
+ break;
+ }
+}
+
+/**
+ * Transpose a src operand.
+ */
+static inline void
+tsrc_transpose(struct toy_src src, struct toy_src *trans)
+{
+ const enum toy_swizzle swizzle[4] = {
+ src.swizzle_x, src.swizzle_y,
+ src.swizzle_z, src.swizzle_w,
+ };
+ int i;
+
+ switch (src.file) {
+ case TOY_FILE_VRF:
+ assert(!src.indirect);
+ for (i = 0; i < 4; i++) {
+ trans[i] = tsrc_offset(src, swizzle[i], 0);
+ trans[i].swizzle_x = TOY_SWIZZLE_X;
+ trans[i].swizzle_y = TOY_SWIZZLE_Y;
+ trans[i].swizzle_z = TOY_SWIZZLE_Z;
+ trans[i].swizzle_w = TOY_SWIZZLE_W;
+ }
+ break;
+ case TOY_FILE_ARF:
+ assert(tsrc_is_null(src));
+ /* fall through */
+ case TOY_FILE_IMM:
+ for (i = 0; i < 4; i++)
+ trans[i] = src;
+ break;
+ case TOY_FILE_GRF:
+ case TOY_FILE_MRF:
+ default:
+ assert(!"unexpected file in src transposition");
+ for (i = 0; i < 4; i++)
+ trans[i] = tsrc_null();
+ break;
+ }
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc(const struct toy_compiler *tc,
+ bool eot,
+ unsigned message_length,
+ unsigned response_length,
+ bool header_present,
+ uint32_t function_control)
+{
+ uint32_t desc;
+
+ assert(message_length >= 1 && message_length <= 15);
+ assert(response_length >= 0 && response_length <= 16);
+ assert(function_control < 1 << 19);
+
+ desc = eot << 31 |
+ message_length << 25 |
+ response_length << 20 |
+ header_present << 19 |
+ function_control;
+
+ return tsrc_imm_ud(desc);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_sampler(const struct toy_compiler *tc,
+ unsigned message_length,
+ unsigned response_length,
+ bool header_present,
+ unsigned simd_mode,
+ unsigned message_type,
+ unsigned sampler_index,
+ unsigned binding_table_index)
+{
+ const bool eot = false;
+ uint32_t ctrl;
+
+ assert(simd_mode < 4);
+ assert(sampler_index < 16);
+ assert(binding_table_index < 256);
+
+ if (tc->gen >= ILO_GEN(7)) {
+ ctrl = simd_mode << 17 |
+ message_type << 12 |
+ sampler_index << 8 |
+ binding_table_index;
+ }
+ else {
+ ctrl = simd_mode << 16 |
+ message_type << 12 |
+ sampler_index << 8 |
+ binding_table_index;
+ }
+
+ return tsrc_imm_mdesc(tc, eot, message_length,
+ response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_data_port(const struct toy_compiler *tc,
+ bool eot,
+ unsigned message_length,
+ unsigned response_length,
+ bool header_present,
+ bool send_write_commit_message,
+ unsigned message_type,
+ unsigned message_specific_control,
+ unsigned binding_table_index)
+{
+ uint32_t ctrl;
+
+ if (tc->gen >= ILO_GEN(7)) {
+ assert(!send_write_commit_message);
+ assert((message_specific_control & 0x3f00) == message_specific_control);
+
+ ctrl = message_type << 14 |
+ (message_specific_control & 0x3f00) |
+ binding_table_index;
+ }
+ else {
+ assert(!send_write_commit_message ||
+ message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE);
+ assert((message_specific_control & 0x1f00) == message_specific_control);
+
+ ctrl = send_write_commit_message << 17 |
+ message_type << 13 |
+ (message_specific_control & 0x1f00) |
+ binding_table_index;
+ }
+
+ return tsrc_imm_mdesc(tc, eot, message_length,
+ response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc,
+ unsigned message_length,
+ unsigned response_length,
+ bool write_type,
+ bool dword_mode,
+ bool invalidate_after_read,
+ int num_registers,
+ int hword_offset)
+{
+ const bool eot = false;
+ const bool header_present = true;
+ uint32_t ctrl;
+
+ assert(tc->gen >= ILO_GEN(7));
+ assert(num_registers == 1 || num_registers == 2 || num_registers == 4);
+
+ ctrl = 1 << 18 |
+ write_type << 17 |
+ dword_mode << 16 |
+ invalidate_after_read << 15 |
+ (num_registers - 1) << 12 |
+ hword_offset;
+
+ return tsrc_imm_mdesc(tc, eot, message_length,
+ response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_urb(const struct toy_compiler *tc,
+ bool eot,
+ unsigned message_length,
+ unsigned response_length,
+ bool complete,
+ bool used,
+ bool allocate,
+ unsigned swizzle_control,
+ unsigned global_offset,
+ unsigned urb_opcode)
+{
+ const bool header_present = true;
+ uint32_t ctrl;
+
+ if (tc->gen >= ILO_GEN(7)) {
+ const bool per_slot_offset = false;
+
+ ctrl = per_slot_offset << 16 |
+ complete << 15 |
+ swizzle_control << 14 |
+ global_offset << 3 |
+ urb_opcode;
+ }
+ else {
+ ctrl = complete << 15 |
+ used << 14 |
+ allocate << 13 |
+ swizzle_control << 10 |
+ global_offset << 4 |
+ urb_opcode;
+ }
+
+ return tsrc_imm_mdesc(tc, eot, message_length,
+ response_length, header_present, ctrl);
+}
+
+#endif /* TOY_HELPERS_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.c b/src/gallium/drivers/ilo/shader/toy_legalize.c
new file mode 100644
index 00000000000..04f2a2570f4
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_legalize.c
@@ -0,0 +1,632 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+#include "toy_helpers.h"
+#include "toy_legalize.h"
+
+/**
+ * Lower an instruction to BRW_OPCODE_SEND(C).
+ */
+void
+toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
+ bool sendc, unsigned sfid)
+{
+ assert(inst->opcode >= 128);
+
+ inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND;
+
+ /* thread control is reserved */
+ assert(inst->thread_ctrl == 0);
+
+ assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
+ inst->cond_modifier = sfid;
+}
+
+static int
+math_op_to_func(unsigned opcode)
+{
+ switch (opcode) {
+ case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV;
+ case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG;
+ case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP;
+ case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT;
+ case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ;
+ case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN;
+ case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS;
+ case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV;
+ case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW;
+ case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+ case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
+ default:
+ assert(!"unknown math opcode");
+ return -1;
+ }
+}
+
+/**
+ * Lower virtual math opcodes to BRW_OPCODE_MATH.
+ */
+void
+toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ struct toy_dst tmp;
+ int i;
+
+ /* see commit 250770b74d33bb8625c780a74a89477af033d13a */
+ for (i = 0; i < Elements(inst->src); i++) {
+ if (tsrc_is_null(inst->src[i]))
+ break;
+
+ /* no swizzling in align1 */
+ /* XXX how about source modifiers? */
+ if (toy_file_is_virtual(inst->src[i].file) &&
+ !tsrc_is_swizzled(inst->src[i]) &&
+ !inst->src[i].absolute &&
+ !inst->src[i].negate)
+ continue;
+
+ tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
+ tc_MOV(tc, tmp, inst->src[i]);
+ inst->src[i] = tsrc_from(tmp);
+ }
+
+ /* FC[0:3] */
+ assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
+ inst->cond_modifier = math_op_to_func(inst->opcode);
+ /* FC[4:5] */
+ assert(inst->thread_ctrl == 0);
+ inst->thread_ctrl = 0;
+
+ inst->opcode = BRW_OPCODE_MATH;
+ tc_move_inst(tc, inst);
+
+ /* no writemask in align1 */
+ if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
+ struct toy_dst dst = inst->dst;
+ struct toy_inst *inst2;
+
+ tmp = tc_alloc_tmp(tc);
+ tmp.type = inst->dst.type;
+ inst->dst = tmp;
+
+ inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
+ inst2->pred_ctrl = inst->pred_ctrl;
+ }
+}
+
+static uint32_t
+absolute_imm(uint32_t imm32, enum toy_type type)
+{
+ union fi val = { .ui = imm32 };
+
+ switch (type) {
+ case TOY_TYPE_F:
+ val.f = fabs(val.f);
+ break;
+ case TOY_TYPE_D:
+ if (val.i < 0)
+ val.i = -val.i;
+ break;
+ case TOY_TYPE_W:
+ if ((int16_t) (val.ui & 0xffff) < 0)
+ val.i = -((int16_t) (val.ui & 0xffff));
+ break;
+ case TOY_TYPE_V:
+ assert(!"cannot take absoulte of immediates of type V");
+ break;
+ default:
+ break;
+ }
+
+ return val.ui;
+}
+
+static uint32_t
+negate_imm(uint32_t imm32, enum toy_type type)
+{
+ union fi val = { .ui = imm32 };
+
+ switch (type) {
+ case TOY_TYPE_F:
+ val.f = -val.f;
+ break;
+ case TOY_TYPE_D:
+ case TOY_TYPE_UD:
+ val.i = -val.i;
+ break;
+ case TOY_TYPE_W:
+ case TOY_TYPE_UW:
+ val.i = -((int16_t) (val.ui & 0xffff));
+ break;
+ default:
+ assert(!"negate immediate of unknown type");
+ break;
+ }
+
+ return val.ui;
+}
+
+static void
+validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ bool move_inst = false;
+ int i;
+
+ for (i = 0; i < Elements(inst->src); i++) {
+ struct toy_dst tmp;
+
+ if (tsrc_is_null(inst->src[i]))
+ break;
+
+ if (inst->src[i].file != TOY_FILE_IMM)
+ continue;
+
+ if (inst->src[i].absolute) {
+ inst->src[i].val32 =
+ absolute_imm(inst->src[i].val32, inst->src[i].type);
+ inst->src[i].absolute = false;
+ }
+
+ if (inst->src[i].negate) {
+ inst->src[i].val32 =
+ negate_imm(inst->src[i].val32, inst->src[i].type);
+ inst->src[i].negate = false;
+ }
+
+ /* this is the last operand */
+ if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1]))
+ break;
+
+ /* need to use a temp if this imm is not the last operand */
+ /* TODO we should simply swap the operands if the op is commutative */
+ tmp = tc_alloc_tmp(tc);
+ tmp = tdst_type(tmp, inst->src[i].type);
+ tc_MOV(tc, tmp, inst->src[i]);
+ inst->src[i] = tsrc_from(tmp);
+
+ move_inst = true;
+ }
+
+ if (move_inst)
+ tc_move_inst(tc, inst);
+}
+
+static void
+lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ const enum toy_type inst_type = inst->dst.type;
+ const struct toy_dst acc0 =
+ tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type);
+ struct toy_inst *inst2;
+
+ /* only need to take care of integer multiplications */
+ if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
+ return;
+
+ /* acc0 = (src0 & 0x0000ffff) * src1 */
+ tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
+
+ /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
+ inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
+ inst->src[0], inst->src[1]);
+ inst2->acc_wr_ctrl = true;
+
+ /* dst = acc0 & 0xffffffff */
+ tc_MOV(tc, inst->dst, tsrc_from(acc0));
+
+ tc_discard_inst(tc, inst);
+}
+
+static void
+lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ const enum toy_type inst_type = inst->dst.type;
+
+ if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
+ const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0);
+
+ tc_MOV(tc, acc0, inst->src[2]);
+ inst->src[2] = tsrc_null();
+ tc_move_inst(tc, inst);
+ }
+ else {
+ struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
+ struct toy_inst *inst2;
+
+ inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
+ lower_opcode_mul(tc, inst2);
+
+ tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
+
+ tc_discard_inst(tc, inst);
+ }
+}
+
+/**
+ * Legalize the instructions for register allocation.
+ */
+void
+toy_compiler_legalize_for_ra(struct toy_compiler *tc)
+{
+ struct toy_inst *inst;
+
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_MAC:
+ lower_opcode_mac(tc, inst);
+ break;
+ case BRW_OPCODE_MAD:
+ /* TODO operands must be floats */
+ break;
+ case BRW_OPCODE_MUL:
+ lower_opcode_mul(tc, inst);
+ break;
+ default:
+ if (inst->opcode > TOY_OPCODE_LAST_HW)
+ tc_fail(tc, "internal opcodes not lowered");
+ }
+ }
+
+ /* loop again as the previous pass may add new instructions */
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ validate_imm(tc, inst);
+ }
+}
+
+static void
+patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ struct toy_inst *inst2;
+ int nest_level, dist;
+
+ nest_level = 0;
+ dist = -1;
+
+ /* search backward */
+ LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
+ &tc->instructions, list) {
+ if (inst2->marker) {
+ if (inst2->opcode == BRW_OPCODE_DO) {
+ if (nest_level) {
+ nest_level--;
+ }
+ else {
+ /* the following instruction */
+ dist++;
+ break;
+ }
+ }
+
+ continue;
+ }
+
+ if (inst2->opcode == BRW_OPCODE_WHILE)
+ nest_level++;
+
+ dist--;
+ }
+
+ if (tc->gen >= ILO_GEN(7))
+ inst->src[1] = tsrc_imm_w(dist * 2);
+ else
+ inst->dst = tdst_imm_w(dist * 2);
+}
+
+static void
+patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ struct toy_inst *inst2;
+ int nest_level, dist;
+ int jip, uip;
+
+ nest_level = 0;
+ dist = 1;
+ jip = 0;
+ uip = 0;
+
+ /* search forward */
+ LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+ if (inst2->marker)
+ continue;
+
+ if (inst2->opcode == BRW_OPCODE_ENDIF) {
+ if (nest_level) {
+ nest_level--;
+ }
+ else {
+ uip = dist * 2;
+ if (!jip)
+ jip = uip;
+ break;
+ }
+ }
+ else if (inst2->opcode == BRW_OPCODE_ELSE &&
+ inst->opcode == BRW_OPCODE_IF) {
+ if (!nest_level) {
+ /* the following instruction */
+ jip = (dist + 1) * 2;
+
+ if (tc->gen == ILO_GEN(6)) {
+ uip = jip;
+ break;
+ }
+ }
+ }
+ else if (inst2->opcode == BRW_OPCODE_IF) {
+ nest_level++;
+ }
+
+ dist++;
+ }
+
+ if (tc->gen >= ILO_GEN(7)) {
+ /* what should the type be? */
+ inst->dst.type = TOY_TYPE_D;
+ inst->src[0].type = TOY_TYPE_D;
+ inst->src[1] = tsrc_imm_d(uip << 16 | jip);
+ }
+ else {
+ inst->dst = tdst_imm_w(jip);
+ }
+
+ inst->thread_ctrl = BRW_THREAD_SWITCH;
+}
+
+static void
+patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ struct toy_inst *inst2;
+ bool found = false;
+ int dist = 1;
+
+ /* search forward for instructions that may enable channels */
+ LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+ if (inst2->marker)
+ continue;
+
+ switch (inst2->opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ found = true;
+ break;
+ default:
+ break;
+ }
+
+ if (found)
+ break;
+
+ dist++;
+ }
+
+ /* should we set dist to (dist - 1) or 1? */
+ if (!found)
+ dist = 1;
+
+ if (tc->gen >= ILO_GEN(7))
+ inst->src[1] = tsrc_imm_w(dist * 2);
+ else
+ inst->dst = tdst_imm_w(dist * 2);
+
+ inst->thread_ctrl = BRW_THREAD_SWITCH;
+}
+
+static void
+patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+ struct toy_inst *inst2, *inst3;
+ int nest_level, dist, jip, uip;
+
+ nest_level = 0;
+ dist = 1;
+ jip = 1 * 2;
+ uip = 1 * 2;
+
+ /* search forward */
+ LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+ if (inst2->marker) {
+ if (inst2->opcode == BRW_OPCODE_DO)
+ nest_level++;
+ continue;
+ }
+
+ if (inst2->opcode == BRW_OPCODE_ELSE ||
+ inst2->opcode == BRW_OPCODE_ENDIF ||
+ inst2->opcode == BRW_OPCODE_WHILE) {
+ jip = dist * 2;
+ break;
+ }
+
+ dist++;
+ }
+
+ /* go on to determine uip */
+ inst3 = inst2;
+ LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
+ if (inst2->marker) {
+ if (inst2->opcode == BRW_OPCODE_DO)
+ nest_level++;
+ continue;
+ }
+
+ if (inst2->opcode == BRW_OPCODE_WHILE) {
+ if (nest_level) {
+ nest_level--;
+ }
+ else {
+ /* the following instruction */
+ if (tc->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK)
+ dist++;
+
+ uip = dist * 2;
+ break;
+ }
+ }
+
+ dist++;
+ }
+
+ /* should the type be D or W? */
+ inst->dst.type = TOY_TYPE_D;
+ inst->src[0].type = TOY_TYPE_D;
+ inst->src[1] = tsrc_imm_d(uip << 16 | jip);
+}
+
+/**
+ * Legalize the instructions for assembling.
+ */
+void
+toy_compiler_legalize_for_asm(struct toy_compiler *tc)
+{
+ struct toy_inst *inst;
+ int pc = 0;
+
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ int i;
+
+ pc++;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 2, page 112:
+ *
+ * "Specifically, for instructions with a single source, it only
+ * uses the first source operand <src0>. In this case, the second
+ * source operand <src1> must be set to null and also with the same
+ * type as the first source operand <src0>. It is a special case
+ * when <src0> is an immediate, as an immediate <src0> uses DW3 of
+ * the instruction word, which is normally used by <src1>. In this
+ * case, <src1> must be programmed with register file ARF and the
+ * same data type as <src0>."
+ *
+ * Since we already fill unused operands with null, we only need to take
+ * care of the type.
+ */
+ if (tsrc_is_null(inst->src[1]))
+ inst->src[1].type = inst->src[0].type;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MATH:
+ /* math does not support align16 nor exec_size > 8 */
+ inst->access_mode = BRW_ALIGN_1;
+
+ if (inst->exec_size == BRW_EXECUTE_16) {
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 3, page 192:
+ *
+ * "INT DIV function does not support SIMD16."
+ */
+ if (tc->gen < ILO_GEN(7) ||
+ inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+ inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) {
+ struct toy_inst *inst2;
+
+ inst->exec_size = BRW_EXECUTE_8;
+ inst->qtr_ctrl = GEN6_COMPRESSION_1Q;
+
+ inst2 = tc_duplicate_inst(tc, inst);
+ inst2->qtr_ctrl = GEN6_COMPRESSION_2Q;
+ inst2->dst = tdst_offset(inst2->dst, 1, 0);
+ inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
+ if (!tsrc_is_null(inst2->src[1]))
+ inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
+
+ pc++;
+ }
+ }
+ break;
+ case BRW_OPCODE_IF:
+ if (tc->gen >= ILO_GEN(7) &&
+ inst->cond_modifier != BRW_CONDITIONAL_NONE) {
+ struct toy_inst *inst2;
+
+ inst2 = tc_duplicate_inst(tc, inst);
+
+ /* replace the original IF by CMP */
+ inst->opcode = BRW_OPCODE_CMP;
+
+ /* predicate control instead of condition modifier */
+ inst2->dst = tdst_null();
+ inst2->src[0] = tsrc_null();
+ inst2->src[1] = tsrc_null();
+ inst2->cond_modifier = BRW_CONDITIONAL_NONE;
+ inst2->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+ pc++;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* MRF to GRF */
+ if (tc->gen >= ILO_GEN(7)) {
+ for (i = 0; i < Elements(inst->src); i++) {
+ if (inst->src[i].file != TOY_FILE_MRF)
+ continue;
+ else if (tsrc_is_null(inst->src[i]))
+ break;
+
+ inst->src[i].file = TOY_FILE_GRF;
+ }
+
+ if (inst->dst.file == TOY_FILE_MRF)
+ inst->dst.file = TOY_FILE_GRF;
+ }
+ }
+
+ tc->num_instructions = pc;
+
+ /* set JIP/UIP */
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ patch_if_else_jip(tc, inst);
+ break;
+ case BRW_OPCODE_ENDIF:
+ patch_endif_jip(tc, inst);
+ break;
+ case BRW_OPCODE_WHILE:
+ patch_while_jip(tc, inst);
+ break;
+ case BRW_OPCODE_BREAK:
+ case BRW_OPCODE_CONTINUE:
+ patch_break_continue_jip(tc, inst);
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.h b/src/gallium/drivers/ilo/shader/toy_legalize.h
new file mode 100644
index 00000000000..8e2a120620b
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_legalize.h
@@ -0,0 +1,52 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_LEGALIZE_H
+#define TOY_LEGALIZE_H
+
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+
+void
+toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
+ bool sendc, unsigned sfid);
+
+void
+toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst);
+
+void
+toy_compiler_allocate_registers(struct toy_compiler *tc,
+ int start_grf, int end_grf,
+ int num_grf_per_vrf);
+
+void
+toy_compiler_legalize_for_ra(struct toy_compiler *tc);
+
+void
+toy_compiler_legalize_for_asm(struct toy_compiler *tc);
+
+#endif /* TOY_LEGALIZE_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize_ra.c b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
new file mode 100644
index 00000000000..e691f127584
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
@@ -0,0 +1,628 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include <stdlib.h> /* for qsort() */
+#include "toy_compiler.h"
+#include "toy_legalize.h"
+
+/**
+ * Live interval of a VRF register.
+ */
+struct linear_scan_live_interval {
+ int vrf;
+ int startpoint;
+ int endpoint;
+
+ /*
+ * should this be assigned a consecutive register of the previous
+ * interval's?
+ */
+ bool consecutive;
+
+ int reg;
+
+ struct list_head list;
+};
+
+/**
+ * Linear scan.
+ */
+struct linear_scan {
+ struct linear_scan_live_interval *intervals;
+ int max_vrf, num_vrfs;
+
+ int num_regs;
+
+ struct list_head active_list;
+ int *free_regs;
+ int num_free_regs;
+
+ int *vrf_mapping;
+};
+
+/**
+ * Return a chunk of registers to the free register pool.
+ */
+static void
+linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
+}
+
+static int
+linear_scan_compare_regs(const void *elem1, const void *elem2)
+{
+ const int *reg1 = elem1;
+ const int *reg2 = elem2;
+
+ /* in reverse order */
+ return (*reg2 - *reg1);
+}
+
+/**
+ * Allocate a chunk of registers from the free register pool.
+ */
+static int
+linear_scan_allocate_regs(struct linear_scan *ls, int count)
+{
+ bool sorted = false;
+ int reg;
+
+ /* simple cases */
+ if (count > ls->num_free_regs)
+ return -1;
+ else if (count == 1)
+ return ls->free_regs[--ls->num_free_regs];
+
+ /* TODO a free register pool */
+ /* TODO reserve some regs for spilling */
+ while (true) {
+ bool found = false;
+ int start;
+
+ /*
+ * find a chunk of registers that have consecutive register
+ * numbers
+ */
+ for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
+ int i;
+
+ for (i = 1; i < count; i++) {
+ if (ls->free_regs[start - i] != ls->free_regs[start] + i)
+ break;
+ }
+
+ if (i >= count) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ reg = ls->free_regs[start];
+
+ if (start != ls->num_free_regs - 1) {
+ start++;
+ memmove(&ls->free_regs[start - count],
+ &ls->free_regs[start],
+ sizeof(*ls->free_regs) * (ls->num_free_regs - start));
+ }
+ ls->num_free_regs -= count;
+ break;
+ }
+ else if (!sorted) {
+ /* sort and retry */
+ qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
+ linear_scan_compare_regs);
+ sorted = true;
+ }
+ else {
+ /* failed */
+ reg = -1;
+ break;
+ }
+ }
+
+ return reg;
+}
+
+/**
+ * Add an interval to the active list.
+ */
+static void
+linear_scan_add_active(struct linear_scan *ls,
+ struct linear_scan_live_interval *interval)
+{
+ struct linear_scan_live_interval *pos;
+
+ /* keep the active list sorted by endpoints */
+ LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
+ if (pos->endpoint >= interval->endpoint)
+ break;
+ }
+
+ list_addtail(&interval->list, &pos->list);
+}
+
+/**
+ * Remove an interval from the active list.
+ */
+static void
+linear_scan_remove_active(struct linear_scan *ls,
+ struct linear_scan_live_interval *interval)
+{
+ list_del(&interval->list);
+}
+
+/**
+ * Remove intervals that are no longer active from the active list.
+ */
+static void
+linear_scan_expire_active(struct linear_scan *ls, int pc)
+{
+ struct linear_scan_live_interval *interval, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
+ /*
+ * since we sort intervals on the active list by their endpoints, we
+ * know that this and the rest of the intervals are still active.
+ */
+ if (interval->endpoint >= pc)
+ break;
+
+ linear_scan_remove_active(ls, interval);
+
+ /* recycle the reg */
+ linear_scan_free_regs(ls, interval->reg, 1);
+ }
+}
+
+/**
+ * Spill an interval.
+ */
+static void
+linear_scan_spill(struct linear_scan *ls,
+ struct linear_scan_live_interval *interval,
+ bool is_active)
+{
+ assert(!"no spilling support");
+}
+
+/**
+ * Spill a range of intervals.
+ */
+static void
+linear_scan_spill_range(struct linear_scan *ls, int first, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ struct linear_scan_live_interval *interval = &ls->intervals[first + i];
+
+ linear_scan_spill(ls, interval, false);
+ }
+}
+
+/**
+ * Perform linear scan to allocate registers for the intervals.
+ */
+static bool
+linear_scan_run(struct linear_scan *ls)
+{
+ int i;
+
+ i = 0;
+ while (i < ls->num_vrfs) {
+ struct linear_scan_live_interval *first = &ls->intervals[i];
+ int reg, count;
+
+ /*
+ * BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
+ * support that
+ */
+ for (count = 1; i + count < ls->num_vrfs; count++) {
+ const struct linear_scan_live_interval *interval =
+ &ls->intervals[i + count];
+
+ if (interval->startpoint != first->startpoint ||
+ !interval->consecutive)
+ break;
+ }
+
+ reg = linear_scan_allocate_regs(ls, count);
+
+ /* expire intervals that are no longer active and try again */
+ if (reg < 0) {
+ linear_scan_expire_active(ls, first->startpoint);
+ reg = linear_scan_allocate_regs(ls, count);
+ }
+
+ /* have to spill some intervals */
+ if (reg < 0) {
+ struct linear_scan_live_interval *last_active =
+ container_of(ls->active_list.prev,
+ (struct linear_scan_live_interval *) NULL, list);
+
+ /* heuristically spill the interval that ends last */
+ if (count > 1 || last_active->endpoint < first->endpoint) {
+ linear_scan_spill_range(ls, i, count);
+ i += count;
+ continue;
+ }
+
+ /* make some room for the new interval */
+ linear_scan_spill(ls, last_active, true);
+ reg = linear_scan_allocate_regs(ls, count);
+ if (reg < 0) {
+ assert(!"failed to spill any register");
+ return false;
+ }
+ }
+
+ while (count--) {
+ struct linear_scan_live_interval *interval = &ls->intervals[i++];
+
+ interval->reg = reg++;
+ linear_scan_add_active(ls, interval);
+
+ ls->vrf_mapping[interval->vrf] = interval->reg;
+
+ /*
+ * this should and must be the case because of how we initialized the
+ * intervals
+ */
+ assert(interval->vrf - first->vrf == interval->reg - first->reg);
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Add a new interval.
+ */
+static void
+linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
+{
+ if (ls->intervals[vrf].vrf)
+ return;
+
+ ls->intervals[vrf].vrf = vrf;
+ ls->intervals[vrf].startpoint = pc;
+
+ ls->num_vrfs++;
+ if (vrf > ls->max_vrf)
+ ls->max_vrf = vrf;
+}
+
+/**
+ * Perform (oversimplified?) live variable analysis.
+ */
+static void
+linear_scan_init_live_intervals(struct linear_scan *ls,
+ struct toy_compiler *tc)
+{
+ const struct toy_inst *inst;
+ int pc, do_pc, while_pc;
+
+ pc = 0;
+ do_pc = -1;
+ while_pc = -1;
+
+ tc_head(tc);
+ while ((inst = tc_next_no_skip(tc)) != NULL) {
+ const int startpoint = (pc <= while_pc) ? do_pc : pc;
+ const int endpoint = (pc <= while_pc) ? while_pc : pc;
+ int vrf, i;
+
+ /*
+ * assume all registers used in this outermost loop are live through out
+ * the whole loop
+ */
+ if (inst->marker) {
+ if (pc > while_pc) {
+ struct toy_inst *inst2;
+ int loop_level = 1;
+
+ assert(inst->opcode == BRW_OPCODE_DO);
+ do_pc = pc;
+ while_pc = pc + 1;
+
+ /* find the matching BRW_OPCODE_WHILE */
+ LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
+ &tc->instructions, list) {
+ if (inst2->marker) {
+ assert(inst->opcode == BRW_OPCODE_DO);
+ loop_level++;
+ continue;
+ }
+
+ if (inst2->opcode == BRW_OPCODE_WHILE) {
+ loop_level--;
+ if (!loop_level)
+ break;
+ }
+ while_pc++;
+ }
+ }
+
+ continue;
+ }
+
+ if (inst->dst.file == TOY_FILE_VRF) {
+ int num_dst;
+
+ /* TODO this is a hack */
+ if (inst->opcode == BRW_OPCODE_SEND ||
+ inst->opcode == BRW_OPCODE_SENDC) {
+ const uint32_t mdesc = inst->src[1].val32;
+ int response_length = (mdesc >> 20) & 0x1f;
+
+ num_dst = response_length;
+ if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
+ num_dst /= 2;
+ }
+ else {
+ num_dst = 1;
+ }
+
+ vrf = inst->dst.val32 / TOY_REG_WIDTH;
+
+ for (i = 0; i < num_dst; i++) {
+ /* first use */
+ if (!ls->intervals[vrf].vrf)
+ linear_scan_add_live_interval(ls, vrf, startpoint);
+
+ ls->intervals[vrf].endpoint = endpoint;
+ ls->intervals[vrf].consecutive = (i > 0);
+
+ vrf++;
+ }
+ }
+
+ for (i = 0; i < Elements(inst->src); i++) {
+ if (inst->src[i].file != TOY_FILE_VRF)
+ continue;
+
+ vrf = inst->src[i].val32 / TOY_REG_WIDTH;
+
+ /* first use */
+ if (!ls->intervals[vrf].vrf)
+ linear_scan_add_live_interval(ls, vrf, startpoint);
+
+ ls->intervals[vrf].endpoint = endpoint;
+ }
+
+ pc++;
+ }
+}
+
+/**
+ * Clean up after performing linear scan.
+ */
+static void
+linear_scan_cleanup(struct linear_scan *ls)
+{
+ FREE(ls->vrf_mapping);
+ FREE(ls->intervals);
+ FREE(ls->free_regs);
+}
+
+static int
+linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
+{
+ const struct linear_scan_live_interval *interval1 = elem1;
+ const struct linear_scan_live_interval *interval2 = elem2;
+
+ /* make unused elements appear at the end */
+ if (!interval1->vrf)
+ return 1;
+ else if (!interval2->vrf)
+ return -1;
+
+ /* sort by startpoints first, and then by vrf */
+ if (interval1->startpoint != interval2->startpoint)
+ return (interval1->startpoint - interval2->startpoint);
+ else
+ return (interval1->vrf - interval2->vrf);
+
+}
+
+/**
+ * Prepare for linear scan.
+ */
+static bool
+linear_scan_init(struct linear_scan *ls, int num_regs,
+ struct toy_compiler *tc)
+{
+ int num_intervals, i;
+
+ memset(ls, 0, sizeof(*ls));
+
+ /* this may be much larger than ls->num_vrfs... */
+ num_intervals = tc->next_vrf;
+ ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
+ if (!ls->intervals)
+ return false;
+
+ linear_scan_init_live_intervals(ls, tc);
+ /* sort intervals by startpoints */
+ qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
+ linear_scan_compare_live_intervals);
+
+ ls->num_regs = num_regs;
+ ls->num_free_regs = num_regs;
+
+ ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
+ if (!ls->free_regs) {
+ FREE(ls->intervals);
+ return false;
+ }
+
+ /* add in reverse order as we will allocate from the tail */
+ for (i = 0; i < ls->num_regs; i++)
+ ls->free_regs[i] = num_regs - i - 1;
+
+ list_inithead(&ls->active_list);
+
+ ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
+ if (!ls->vrf_mapping) {
+ FREE(ls->intervals);
+ FREE(ls->free_regs);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Allocate registers with linear scan.
+ */
+static void
+linear_scan_allocation(struct toy_compiler *tc,
+ int start_grf, int end_grf,
+ int num_grf_per_vrf)
+{
+ const int num_grfs = end_grf - start_grf + 1;
+ struct linear_scan ls;
+ struct toy_inst *inst;
+
+ if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
+ return;
+
+ if (!linear_scan_run(&ls)) {
+ tc_fail(tc, "failed to allocate registers");
+ return;
+ }
+
+
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ int i;
+
+ if (inst->dst.file == TOY_FILE_VRF) {
+ const uint32_t val32 = inst->dst.val32;
+ int reg = val32 / TOY_REG_WIDTH;
+ int subreg = val32 % TOY_REG_WIDTH;
+
+ /* map to GRF */
+ reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
+
+ inst->dst.file = TOY_FILE_GRF;
+ inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
+ }
+
+ for (i = 0; i < Elements(inst->src); i++) {
+ const uint32_t val32 = inst->src[i].val32;
+ int reg, subreg;
+
+ if (inst->src[i].file != TOY_FILE_VRF)
+ continue;
+
+ reg = val32 / TOY_REG_WIDTH;
+ subreg = val32 % TOY_REG_WIDTH;
+
+ /* map to GRF */
+ reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
+
+ inst->src[i].file = TOY_FILE_GRF;
+ inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
+ }
+ }
+
+ linear_scan_cleanup(&ls);
+}
+
+/**
+ * Trivially allocate registers.
+ */
+static void
+trivial_allocation(struct toy_compiler *tc,
+ int start_grf, int end_grf,
+ int num_grf_per_vrf)
+{
+ struct toy_inst *inst;
+ int max_grf = -1;
+
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ int i;
+
+ if (inst->dst.file == TOY_FILE_VRF) {
+ const uint32_t val32 = inst->dst.val32;
+ int reg = val32 / TOY_REG_WIDTH;
+ int subreg = val32 % TOY_REG_WIDTH;
+
+ reg = reg * num_grf_per_vrf + start_grf - 1;
+
+ inst->dst.file = TOY_FILE_GRF;
+ inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
+
+ if (reg > max_grf)
+ max_grf = reg;
+ }
+
+ for (i = 0; i < Elements(inst->src); i++) {
+ const uint32_t val32 = inst->src[i].val32;
+ int reg, subreg;
+
+ if (inst->src[i].file != TOY_FILE_VRF)
+ continue;
+
+ reg = val32 / TOY_REG_WIDTH;
+ subreg = val32 % TOY_REG_WIDTH;
+
+ reg = reg * num_grf_per_vrf + start_grf - 1;
+
+ inst->src[i].file = TOY_FILE_GRF;
+ inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
+
+ if (reg > max_grf)
+ max_grf = reg;
+ }
+ }
+
+ if (max_grf + num_grf_per_vrf - 1 > end_grf)
+ tc_fail(tc, "failed to allocate registers");
+}
+
+/**
+ * Allocate GRF registers to VRF registers.
+ */
+void
+toy_compiler_allocate_registers(struct toy_compiler *tc,
+ int start_grf, int end_grf,
+ int num_grf_per_vrf)
+{
+ if (true)
+ linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
+ else
+ trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.c b/src/gallium/drivers/ilo/shader/toy_optimize.c
new file mode 100644
index 00000000000..62a663f4901
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_optimize.c
@@ -0,0 +1,71 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+#include "toy_optimize.h"
+
+/**
+ * This just eliminates instructions with null dst so far.
+ */
+static void
+eliminate_dead_code(struct toy_compiler *tc)
+{
+ struct toy_inst *inst;
+
+ tc_head(tc);
+ while ((inst = tc_next(tc)) != NULL) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_BREAK:
+ case BRW_OPCODE_CONTINUE:
+ case BRW_OPCODE_SEND:
+ case BRW_OPCODE_SENDC:
+ case BRW_OPCODE_NOP:
+ /* never eliminated */
+ break;
+ default:
+ if (tdst_is_null(inst->dst) || !inst->dst.writemask) {
+ /* math is always BRW_CONDITIONAL_NONE */
+ if ((inst->opcode == BRW_OPCODE_MATH ||
+ inst->cond_modifier == BRW_CONDITIONAL_NONE) &&
+ !inst->acc_wr_ctrl)
+ tc_discard_inst(tc, inst);
+ }
+ break;
+ }
+ }
+}
+
+void
+toy_compiler_optimize(struct toy_compiler *tc)
+{
+ eliminate_dead_code(tc);
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.h b/src/gallium/drivers/ilo/shader/toy_optimize.h
new file mode 100644
index 00000000000..f65198c7538
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_optimize.h
@@ -0,0 +1,36 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_OPTIMIZE_H
+#define TOY_OPTIMIZE_H
+
+#include "toy_compiler.h"
+
+void
+toy_compiler_optimize(struct toy_compiler *tc);
+
+#endif /* TOY_OPTIMIZE_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c
new file mode 100644
index 00000000000..c2b1da5ed55
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -0,0 +1,2736 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "util/u_hash_table.h"
+#include "toy_helpers.h"
+#include "toy_tgsi.h"
+
+/* map TGSI opcode to GEN opcode 1-to-1 */
+static const struct {
+ int opcode;
+ int num_dst;
+ int num_src;
+} aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
+ [TGSI_OPCODE_ARL] = { BRW_OPCODE_RNDD, 1, 1 },
+ [TGSI_OPCODE_MOV] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
+ [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
+ [TGSI_OPCODE_MUL] = { BRW_OPCODE_MUL, 1, 2 },
+ [TGSI_OPCODE_ADD] = { BRW_OPCODE_ADD, 1, 2 },
+ [TGSI_OPCODE_DP3] = { BRW_OPCODE_DP3, 1, 2 },
+ [TGSI_OPCODE_DP4] = { BRW_OPCODE_DP4, 1, 2 },
+ [TGSI_OPCODE_MIN] = { BRW_OPCODE_SEL, 1, 2 },
+ [TGSI_OPCODE_MAX] = { BRW_OPCODE_SEL, 1, 2 },
+ /* a later pass will move src[2] to accumulator */
+ [TGSI_OPCODE_MAD] = { BRW_OPCODE_MAC, 1, 3 },
+ [TGSI_OPCODE_SUB] = { BRW_OPCODE_ADD, 1, 2 },
+ [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
+ [TGSI_OPCODE_FRC] = { BRW_OPCODE_FRC, 1, 1 },
+ [TGSI_OPCODE_FLR] = { BRW_OPCODE_RNDD, 1, 1 },
+ [TGSI_OPCODE_ROUND] = { BRW_OPCODE_RNDE, 1, 1 },
+ [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
+ [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
+ [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
+ [TGSI_OPCODE_ABS] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_DPH] = { BRW_OPCODE_DPH, 1, 2 },
+ [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
+ [TGSI_OPCODE_KILP] = { TOY_OPCODE_KIL, 0, 0 },
+ [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
+ [TGSI_OPCODE_ARR] = { BRW_OPCODE_RNDZ, 1, 1 },
+ [TGSI_OPCODE_DP2] = { BRW_OPCODE_DP2, 1, 2 },
+ [TGSI_OPCODE_IF] = { BRW_OPCODE_IF, 0, 1 },
+ [TGSI_OPCODE_UIF] = { BRW_OPCODE_IF, 0, 1 },
+ [TGSI_OPCODE_ELSE] = { BRW_OPCODE_ELSE, 0, 0 },
+ [TGSI_OPCODE_ENDIF] = { BRW_OPCODE_ENDIF, 0, 0 },
+ [TGSI_OPCODE_I2F] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_NOT] = { BRW_OPCODE_NOT, 1, 1 },
+ [TGSI_OPCODE_TRUNC] = { BRW_OPCODE_RNDZ, 1, 1 },
+ [TGSI_OPCODE_SHL] = { BRW_OPCODE_SHL, 1, 2 },
+ [TGSI_OPCODE_AND] = { BRW_OPCODE_AND, 1, 2 },
+ [TGSI_OPCODE_OR] = { BRW_OPCODE_OR, 1, 2 },
+ [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
+ [TGSI_OPCODE_XOR] = { BRW_OPCODE_XOR, 1, 2 },
+ [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
+ [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
+ [TGSI_OPCODE_NOP] = { BRW_OPCODE_NOP, 0, 0 },
+ [TGSI_OPCODE_KIL] = { TOY_OPCODE_KIL, 0, 1 },
+ [TGSI_OPCODE_END] = { BRW_OPCODE_NOP, 0, 0 },
+ [TGSI_OPCODE_F2I] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
+ [TGSI_OPCODE_IMAX] = { BRW_OPCODE_SEL, 1, 2 },
+ [TGSI_OPCODE_IMIN] = { BRW_OPCODE_SEL, 1, 2 },
+ [TGSI_OPCODE_INEG] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_ISHR] = { BRW_OPCODE_ASR, 1, 2 },
+ [TGSI_OPCODE_F2U] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_U2F] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_UADD] = { BRW_OPCODE_ADD, 1, 2 },
+ [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
+ /* a later pass will move src[2] to accumulator */
+ [TGSI_OPCODE_UMAD] = { BRW_OPCODE_MAC, 1, 3 },
+ [TGSI_OPCODE_UMAX] = { BRW_OPCODE_SEL, 1, 2 },
+ [TGSI_OPCODE_UMIN] = { BRW_OPCODE_SEL, 1, 2 },
+ [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
+ [TGSI_OPCODE_UMUL] = { BRW_OPCODE_MUL, 1, 2 },
+ [TGSI_OPCODE_USHR] = { BRW_OPCODE_SHR, 1, 2 },
+ [TGSI_OPCODE_UARL] = { BRW_OPCODE_MOV, 1, 1 },
+ [TGSI_OPCODE_IABS] = { BRW_OPCODE_MOV, 1, 1 },
+};
+
+static void
+aos_simple(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ int opcode;
+ int cond_modifier = BRW_CONDITIONAL_NONE;
+ int num_dst = tgsi_inst->Instruction.NumDstRegs;
+ int num_src = tgsi_inst->Instruction.NumSrcRegs;
+ int i;
+
+ opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
+ assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
+ assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
+ if (!opcode) {
+ assert(!"invalid aos_simple() call");
+ return;
+ }
+
+ /* no need to emit nop */
+ if (opcode == BRW_OPCODE_NOP)
+ return;
+
+ inst = tc_add(tc);
+ if (!inst)
+ return;
+
+ inst->opcode = opcode;
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_UMIN:
+ cond_modifier = BRW_CONDITIONAL_L;
+ break;
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_UMAX:
+ cond_modifier = BRW_CONDITIONAL_GE;
+ break;
+ case TGSI_OPCODE_SUB:
+ src[1] = tsrc_negate(src[1]);
+ break;
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_IABS:
+ src[0] = tsrc_absolute(src[0]);
+ break;
+ case TGSI_OPCODE_IF:
+ cond_modifier = BRW_CONDITIONAL_NEQ;
+ num_src = 2;
+ assert(src[0].type == TOY_TYPE_F);
+ src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+ src[1] = tsrc_imm_f(0.0f);
+ break;
+ case TGSI_OPCODE_UIF:
+ cond_modifier = BRW_CONDITIONAL_NEQ;
+ num_src = 2;
+ assert(src[0].type == TOY_TYPE_D);
+ src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+ src[1] = tsrc_imm_d(0);
+ break;
+ case TGSI_OPCODE_INEG:
+ src[0] = tsrc_negate(src[0]);
+ break;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+ break;
+ case TGSI_OPCODE_POW:
+ src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+ src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
+ break;
+ }
+
+ inst->cond_modifier = cond_modifier;
+
+ if (num_dst) {
+ assert(num_dst == 1);
+ inst->dst = dst[0];
+ }
+
+ assert(num_src <= Elements(inst->src));
+ for (i = 0; i < num_src; i++)
+ inst->src[i] = src[i];
+}
+
+static void
+aos_set_on_cond(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ int cond;
+ struct toy_src zero, one;
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_USLT:
+ cond = BRW_CONDITIONAL_L;
+ break;
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USGE:
+ cond = BRW_CONDITIONAL_GE;
+ break;
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_USEQ:
+ cond = BRW_CONDITIONAL_EQ;
+ break;
+ case TGSI_OPCODE_SGT:
+ cond = BRW_CONDITIONAL_G;
+ break;
+ case TGSI_OPCODE_SLE:
+ cond = BRW_CONDITIONAL_LE;
+ break;
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_USNE:
+ cond = BRW_CONDITIONAL_NEQ;
+ break;
+ default:
+ assert(!"invalid aos_set_on_cond() call");
+ return;
+ }
+
+ /* note that for integer versions, all bits are set */
+ switch (dst[0].type) {
+ case TOY_TYPE_F:
+ default:
+ zero = tsrc_imm_f(0.0f);
+ one = tsrc_imm_f(1.0f);
+ break;
+ case TOY_TYPE_D:
+ zero = tsrc_imm_d(0);
+ one = tsrc_imm_d(-1);
+ break;
+ case TOY_TYPE_UD:
+ zero = tsrc_imm_ud(0);
+ one = tsrc_imm_ud(~0);
+ break;
+ }
+
+ tc_MOV(tc, dst[0], zero);
+ tc_CMP(tc, tdst_null(), src[0], src[1], cond);
+ inst = tc_MOV(tc, dst[0], one);
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_compare(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ struct toy_src zero;
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_CMP:
+ zero = tsrc_imm_f(0.0f);
+ break;
+ case TGSI_OPCODE_UCMP:
+ zero = tsrc_imm_ud(0);
+ break;
+ default:
+ assert(!"invalid aos_compare() call");
+ return;
+ }
+
+ tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
+ inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE);
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_set_sign(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ struct toy_src zero, one, neg_one;
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SSG:
+ zero = tsrc_imm_f(0.0f);
+ one = tsrc_imm_f(1.0f);
+ neg_one = tsrc_imm_f(-1.0f);
+ break;
+ case TGSI_OPCODE_ISSG:
+ zero = tsrc_imm_d(0);
+ one = tsrc_imm_d(1);
+ neg_one = tsrc_imm_d(-1);
+ break;
+ default:
+ assert(!"invalid aos_set_sign() call");
+ return;
+ }
+
+ tc_MOV(tc, dst[0], zero);
+
+ tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G);
+ inst = tc_MOV(tc, dst[0], one);
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+ tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
+ inst = tc_MOV(tc, dst[0], neg_one);
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_tex(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ enum toy_opcode opcode;
+ int i;
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ opcode = TOY_OPCODE_TGSI_TEX;
+ break;
+ case TGSI_OPCODE_TXD:
+ opcode = TOY_OPCODE_TGSI_TXD;
+ break;
+ case TGSI_OPCODE_TXP:
+ opcode = TOY_OPCODE_TGSI_TXP;
+ break;
+ case TGSI_OPCODE_TXB:
+ opcode = TOY_OPCODE_TGSI_TXB;
+ break;
+ case TGSI_OPCODE_TXL:
+ opcode = TOY_OPCODE_TGSI_TXL;
+ break;
+ case TGSI_OPCODE_TXF:
+ opcode = TOY_OPCODE_TGSI_TXF;
+ break;
+ case TGSI_OPCODE_TXQ:
+ opcode = TOY_OPCODE_TGSI_TXQ;
+ break;
+ case TGSI_OPCODE_TXQ_LZ:
+ opcode = TOY_OPCODE_TGSI_TXQ_LZ;
+ break;
+ case TGSI_OPCODE_TEX2:
+ opcode = TOY_OPCODE_TGSI_TEX2;
+ case TGSI_OPCODE_TXB2:
+ opcode = TOY_OPCODE_TGSI_TXB2;
+ break;
+ case TGSI_OPCODE_TXL2:
+ opcode = TOY_OPCODE_TGSI_TXL2;
+ break;
+ default:
+ assert(!"unsupported texturing opcode");
+ return;
+ break;
+ }
+
+ assert(tgsi_inst->Instruction.Texture);
+
+ inst = tc_add(tc);
+ inst->opcode = opcode;
+ inst->tex.target = tgsi_inst->Texture.Texture;
+
+ assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
+ assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+ inst->dst = dst[0];
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ inst->src[i] = src[i];
+
+ for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
+ tc_fail(tc, "texelFetchOffset unsupported");
+}
+
+static void
+aos_sample(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+ enum toy_opcode opcode;
+ int i;
+
+ assert(!"sampling untested");
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SAMPLE:
+ opcode = TOY_OPCODE_TGSI_SAMPLE;
+ break;
+ case TGSI_OPCODE_SAMPLE_I:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_I;
+ break;
+ case TGSI_OPCODE_SAMPLE_I_MS:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
+ break;
+ case TGSI_OPCODE_SAMPLE_B:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_B;
+ break;
+ case TGSI_OPCODE_SAMPLE_C:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_C;
+ break;
+ case TGSI_OPCODE_SAMPLE_C_LZ:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
+ break;
+ case TGSI_OPCODE_SAMPLE_D:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_D;
+ break;
+ case TGSI_OPCODE_SAMPLE_L:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_L;
+ break;
+ case TGSI_OPCODE_GATHER4:
+ opcode = TOY_OPCODE_TGSI_GATHER4;
+ break;
+ case TGSI_OPCODE_SVIEWINFO:
+ opcode = TOY_OPCODE_TGSI_SVIEWINFO;
+ break;
+ case TGSI_OPCODE_SAMPLE_POS:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
+ break;
+ case TGSI_OPCODE_SAMPLE_INFO:
+ opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
+ break;
+ default:
+ assert(!"unsupported sampling opcode");
+ return;
+ break;
+ }
+
+ inst = tc_add(tc);
+ inst->opcode = opcode;
+
+ assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
+ assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+ inst->dst = dst[0];
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ inst->src[i] = src[i];
+}
+
+static void
+aos_LIT(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
+
+ if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
+ return;
+
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
+
+ tc_CMP(tc, tdst_null(),
+ tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
+ tsrc_imm_f(0.0f),
+ BRW_CONDITIONAL_G);
+
+ inst = tc_MOV(tc,
+ tdst_writemask(dst[0], TOY_WRITEMASK_Y),
+ tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+ /* clamp W to (-128, 128)? */
+ inst = tc_POW(tc,
+ tdst_writemask(dst[0], TOY_WRITEMASK_Z),
+ tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
+ tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_EXP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+
+ if (dst[0].writemask & TOY_WRITEMASK_X) {
+ struct toy_dst tmp =
+ tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
+
+ tc_RNDD(tc, tmp, src0);
+
+ /* construct the floating point number manually */
+ tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
+ tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
+ tsrc_from(tmp), tsrc_imm_d(23));
+ }
+
+ tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
+ tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_LOG(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+
+ if (dst[0].writemask & TOY_WRITEMASK_XY) {
+ struct toy_dst tmp;
+
+ tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
+
+ /* exponent */
+ tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
+ tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
+ tsrc_from(tmp), tsrc_imm_d(-127));
+
+ /* mantissa */
+ tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
+ tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
+ tsrc_from(tmp), tsrc_imm_d(127 << 23));
+ }
+
+ tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_DST(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
+ tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
+}
+
+static void
+aos_LRP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
+ tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
+ tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
+}
+
+static void
+aos_CND(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+
+ assert(!"CND untested");
+
+ tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G);
+ inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE);
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_DP2A(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ assert(!"DP2A untested");
+
+ tc_DP2(tc, tmp, src[0], src[1]);
+ tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
+}
+
+static void
+aos_CLAMP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ assert(!"CLAMP untested");
+
+ tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE);
+ tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L);
+}
+
+static void
+aos_XPD(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
+ tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
+ TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
+ tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
+ TOY_SWIZZLE_X, TOY_SWIZZLE_W));
+
+ tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
+ tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
+ TOY_SWIZZLE_X, TOY_SWIZZLE_W),
+ tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
+ TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
+ tsrc_negate(tsrc_from(tmp)));
+
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
+ tsrc_imm_f(1.0f));
+}
+
+static void
+aos_PK2H(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
+ const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
+ struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
+
+ assert(!"PK2H untested");
+
+ tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
+ tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
+}
+
+static void
+aos_SFL(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ assert(!"SFL untested");
+
+ tc_MOV(tc, dst[0], tsrc_imm_f(0.0f));
+}
+
+static void
+aos_STR(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ assert(!"STR untested");
+
+ tc_MOV(tc, dst[0], tsrc_imm_f(1.0f));
+}
+
+static void
+aos_UP2H(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ assert(!"UP2H untested");
+
+ tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
+ tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
+ tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
+ tsrc_ud(src[0]), tsrc_imm_ud(16));
+}
+
+static void
+aos_SCS(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ assert(!"SCS untested");
+
+ tc_add1(tc, TOY_OPCODE_COS,
+ tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
+
+ tc_add1(tc, TOY_OPCODE_SIN,
+ tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
+
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_NRM(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ assert(!"NRM untested");
+
+ tc_DP3(tc, tmp, src[0], src[0]);
+ tc_INV(tc, tmp, tsrc_from(tmp));
+ tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
+ src[0], tsrc_from(tmp));
+
+ tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_DIV(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ assert(!"DIV untested");
+
+ tc_INV(tc, tmp, src[1]);
+ tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
+}
+
+static void
+aos_BRK(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ tc_add0(tc, BRW_OPCODE_BREAK);
+}
+
+static void
+aos_CEIL(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ tc_RNDD(tc, tmp, tsrc_negate(src[0]));
+ tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
+}
+
+static void
+aos_SAD(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ assert(!"SAD untested");
+
+ tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
+ tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
+}
+
+static void
+aos_CONT(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ tc_add0(tc, BRW_OPCODE_CONTINUE);
+}
+
+static void
+aos_BGNLOOP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_inst *inst;
+
+ inst = tc_add0(tc, BRW_OPCODE_DO);
+ /* this is just a marker */
+ inst->marker = true;
+}
+
+static void
+aos_ENDLOOP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ tc_add0(tc, BRW_OPCODE_WHILE);
+}
+
+static void
+aos_NRM4(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ struct toy_dst tmp = tc_alloc_tmp(tc);
+
+ assert(!"NRM4 untested");
+
+ tc_DP4(tc, tmp, src[0], src[0]);
+ tc_INV(tc, tmp, tsrc_from(tmp));
+ tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp));
+}
+
+static void
+aos_unsupported(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src)
+{
+ const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
+
+ ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
+
+ tc_fail(tc, "unsupported TGSI instruction");
+}
+
+static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
+ [TGSI_OPCODE_ARL] = aos_simple,
+ [TGSI_OPCODE_MOV] = aos_simple,
+ [TGSI_OPCODE_LIT] = aos_LIT,
+ [TGSI_OPCODE_RCP] = aos_simple,
+ [TGSI_OPCODE_RSQ] = aos_simple,
+ [TGSI_OPCODE_EXP] = aos_EXP,
+ [TGSI_OPCODE_LOG] = aos_LOG,
+ [TGSI_OPCODE_MUL] = aos_simple,
+ [TGSI_OPCODE_ADD] = aos_simple,
+ [TGSI_OPCODE_DP3] = aos_simple,
+ [TGSI_OPCODE_DP4] = aos_simple,
+ [TGSI_OPCODE_DST] = aos_DST,
+ [TGSI_OPCODE_MIN] = aos_simple,
+ [TGSI_OPCODE_MAX] = aos_simple,
+ [TGSI_OPCODE_SLT] = aos_set_on_cond,
+ [TGSI_OPCODE_SGE] = aos_set_on_cond,
+ [TGSI_OPCODE_MAD] = aos_simple,
+ [TGSI_OPCODE_SUB] = aos_simple,
+ [TGSI_OPCODE_LRP] = aos_LRP,
+ [TGSI_OPCODE_CND] = aos_CND,
+ [TGSI_OPCODE_SQRT] = aos_simple,
+ [TGSI_OPCODE_DP2A] = aos_DP2A,
+ [22] = aos_unsupported,
+ [23] = aos_unsupported,
+ [TGSI_OPCODE_FRC] = aos_simple,
+ [TGSI_OPCODE_CLAMP] = aos_CLAMP,
+ [TGSI_OPCODE_FLR] = aos_simple,
+ [TGSI_OPCODE_ROUND] = aos_simple,
+ [TGSI_OPCODE_EX2] = aos_simple,
+ [TGSI_OPCODE_LG2] = aos_simple,
+ [TGSI_OPCODE_POW] = aos_simple,
+ [TGSI_OPCODE_XPD] = aos_XPD,
+ [32] = aos_unsupported,
+ [TGSI_OPCODE_ABS] = aos_simple,
+ [TGSI_OPCODE_RCC] = aos_unsupported,
+ [TGSI_OPCODE_DPH] = aos_simple,
+ [TGSI_OPCODE_COS] = aos_simple,
+ [TGSI_OPCODE_DDX] = aos_unsupported,
+ [TGSI_OPCODE_DDY] = aos_unsupported,
+ [TGSI_OPCODE_KILP] = aos_simple,
+ [TGSI_OPCODE_PK2H] = aos_PK2H,
+ [TGSI_OPCODE_PK2US] = aos_unsupported,
+ [TGSI_OPCODE_PK4B] = aos_unsupported,
+ [TGSI_OPCODE_PK4UB] = aos_unsupported,
+ [TGSI_OPCODE_RFL] = aos_unsupported,
+ [TGSI_OPCODE_SEQ] = aos_set_on_cond,
+ [TGSI_OPCODE_SFL] = aos_SFL,
+ [TGSI_OPCODE_SGT] = aos_set_on_cond,
+ [TGSI_OPCODE_SIN] = aos_simple,
+ [TGSI_OPCODE_SLE] = aos_set_on_cond,
+ [TGSI_OPCODE_SNE] = aos_set_on_cond,
+ [TGSI_OPCODE_STR] = aos_STR,
+ [TGSI_OPCODE_TEX] = aos_tex,
+ [TGSI_OPCODE_TXD] = aos_tex,
+ [TGSI_OPCODE_TXP] = aos_tex,
+ [TGSI_OPCODE_UP2H] = aos_UP2H,
+ [TGSI_OPCODE_UP2US] = aos_unsupported,
+ [TGSI_OPCODE_UP4B] = aos_unsupported,
+ [TGSI_OPCODE_UP4UB] = aos_unsupported,
+ [TGSI_OPCODE_X2D] = aos_unsupported,
+ [TGSI_OPCODE_ARA] = aos_unsupported,
+ [TGSI_OPCODE_ARR] = aos_simple,
+ [TGSI_OPCODE_BRA] = aos_unsupported,
+ [TGSI_OPCODE_CAL] = aos_unsupported,
+ [TGSI_OPCODE_RET] = aos_unsupported,
+ [TGSI_OPCODE_SSG] = aos_set_sign,
+ [TGSI_OPCODE_CMP] = aos_compare,
+ [TGSI_OPCODE_SCS] = aos_SCS,
+ [TGSI_OPCODE_TXB] = aos_tex,
+ [TGSI_OPCODE_NRM] = aos_NRM,
+ [TGSI_OPCODE_DIV] = aos_DIV,
+ [TGSI_OPCODE_DP2] = aos_simple,
+ [TGSI_OPCODE_TXL] = aos_tex,
+ [TGSI_OPCODE_BRK] = aos_BRK,
+ [TGSI_OPCODE_IF] = aos_simple,
+ [TGSI_OPCODE_UIF] = aos_simple,
+ [76] = aos_unsupported,
+ [TGSI_OPCODE_ELSE] = aos_simple,
+ [TGSI_OPCODE_ENDIF] = aos_simple,
+ [79] = aos_unsupported,
+ [80] = aos_unsupported,
+ [TGSI_OPCODE_PUSHA] = aos_unsupported,
+ [TGSI_OPCODE_POPA] = aos_unsupported,
+ [TGSI_OPCODE_CEIL] = aos_CEIL,
+ [TGSI_OPCODE_I2F] = aos_simple,
+ [TGSI_OPCODE_NOT] = aos_simple,
+ [TGSI_OPCODE_TRUNC] = aos_simple,
+ [TGSI_OPCODE_SHL] = aos_simple,
+ [88] = aos_unsupported,
+ [TGSI_OPCODE_AND] = aos_simple,
+ [TGSI_OPCODE_OR] = aos_simple,
+ [TGSI_OPCODE_MOD] = aos_simple,
+ [TGSI_OPCODE_XOR] = aos_simple,
+ [TGSI_OPCODE_SAD] = aos_SAD,
+ [TGSI_OPCODE_TXF] = aos_tex,
+ [TGSI_OPCODE_TXQ] = aos_tex,
+ [TGSI_OPCODE_CONT] = aos_CONT,
+ [TGSI_OPCODE_EMIT] = aos_simple,
+ [TGSI_OPCODE_ENDPRIM] = aos_simple,
+ [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
+ [TGSI_OPCODE_BGNSUB] = aos_unsupported,
+ [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
+ [TGSI_OPCODE_ENDSUB] = aos_unsupported,
+ [TGSI_OPCODE_TXQ_LZ] = aos_tex,
+ [104] = aos_unsupported,
+ [105] = aos_unsupported,
+ [106] = aos_unsupported,
+ [TGSI_OPCODE_NOP] = aos_simple,
+ [108] = aos_unsupported,
+ [109] = aos_unsupported,
+ [110] = aos_unsupported,
+ [111] = aos_unsupported,
+ [TGSI_OPCODE_NRM4] = aos_NRM4,
+ [TGSI_OPCODE_CALLNZ] = aos_unsupported,
+ [TGSI_OPCODE_BREAKC] = aos_unsupported,
+ [TGSI_OPCODE_KIL] = aos_simple,
+ [TGSI_OPCODE_END] = aos_simple,
+ [118] = aos_unsupported,
+ [TGSI_OPCODE_F2I] = aos_simple,
+ [TGSI_OPCODE_IDIV] = aos_simple,
+ [TGSI_OPCODE_IMAX] = aos_simple,
+ [TGSI_OPCODE_IMIN] = aos_simple,
+ [TGSI_OPCODE_INEG] = aos_simple,
+ [TGSI_OPCODE_ISGE] = aos_set_on_cond,
+ [TGSI_OPCODE_ISHR] = aos_simple,
+ [TGSI_OPCODE_ISLT] = aos_set_on_cond,
+ [TGSI_OPCODE_F2U] = aos_simple,
+ [TGSI_OPCODE_U2F] = aos_simple,
+ [TGSI_OPCODE_UADD] = aos_simple,
+ [TGSI_OPCODE_UDIV] = aos_simple,
+ [TGSI_OPCODE_UMAD] = aos_simple,
+ [TGSI_OPCODE_UMAX] = aos_simple,
+ [TGSI_OPCODE_UMIN] = aos_simple,
+ [TGSI_OPCODE_UMOD] = aos_simple,
+ [TGSI_OPCODE_UMUL] = aos_simple,
+ [TGSI_OPCODE_USEQ] = aos_set_on_cond,
+ [TGSI_OPCODE_USGE] = aos_set_on_cond,
+ [TGSI_OPCODE_USHR] = aos_simple,
+ [TGSI_OPCODE_USLT] = aos_set_on_cond,
+ [TGSI_OPCODE_USNE] = aos_set_on_cond,
+ [TGSI_OPCODE_SWITCH] = aos_unsupported,
+ [TGSI_OPCODE_CASE] = aos_unsupported,
+ [TGSI_OPCODE_DEFAULT] = aos_unsupported,
+ [TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
+ [TGSI_OPCODE_SAMPLE] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_I] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_B] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_C] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_D] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_L] = aos_sample,
+ [TGSI_OPCODE_GATHER4] = aos_sample,
+ [TGSI_OPCODE_SVIEWINFO] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_POS] = aos_sample,
+ [TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
+ [TGSI_OPCODE_UARL] = aos_simple,
+ [TGSI_OPCODE_UCMP] = aos_compare,
+ [TGSI_OPCODE_IABS] = aos_simple,
+ [TGSI_OPCODE_ISSG] = aos_set_sign,
+ [TGSI_OPCODE_LOAD] = aos_unsupported,
+ [TGSI_OPCODE_STORE] = aos_unsupported,
+ [TGSI_OPCODE_MFENCE] = aos_unsupported,
+ [TGSI_OPCODE_LFENCE] = aos_unsupported,
+ [TGSI_OPCODE_SFENCE] = aos_unsupported,
+ [TGSI_OPCODE_BARRIER] = aos_unsupported,
+ [TGSI_OPCODE_ATOMUADD] = aos_unsupported,
+ [TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
+ [TGSI_OPCODE_ATOMCAS] = aos_unsupported,
+ [TGSI_OPCODE_ATOMAND] = aos_unsupported,
+ [TGSI_OPCODE_ATOMOR] = aos_unsupported,
+ [TGSI_OPCODE_ATOMXOR] = aos_unsupported,
+ [TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
+ [TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
+ [TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
+ [TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
+ [TGSI_OPCODE_TEX2] = aos_tex,
+ [TGSI_OPCODE_TXB2] = aos_tex,
+ [TGSI_OPCODE_TXL2] = aos_tex,
+};
+
+static void
+soa_passthrough(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ const toy_tgsi_translate translate =
+ aos_translate_table[tgsi_inst->Instruction.Opcode];
+
+ translate(tc, tgsi_inst, dst_, src_);
+}
+
+static void
+soa_per_channel(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
+ struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
+ int i, ch;
+
+ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+ tdst_transpose(dst_[i], dst[i]);
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ tsrc_transpose(src_[i], src[i]);
+
+ /* emit the same instruction four times for the four channels */
+ for (ch = 0; ch < 4; ch++) {
+ struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
+ struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
+
+ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+ aos_dst[i] = dst[i][ch];
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ aos_src[i] = src[i][ch];
+
+ aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
+ tgsi_inst, aos_dst, aos_src);
+ }
+}
+
+static void
+soa_scalar_replicate(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4], tmp;
+ struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
+ int opcode, i;
+
+ assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+ tdst_transpose(dst_[0], dst0);
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
+ struct toy_src tmp[4];
+
+ tsrc_transpose(src_[i], tmp);
+ /* only the X channels */
+ srcx[i] = tmp[0];
+ }
+
+ tmp = tc_alloc_tmp(tc);
+
+ opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
+ assert(opcode);
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SQRT:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ tc_add1(tc, opcode, tmp, srcx[0]);
+ break;
+ case TGSI_OPCODE_POW:
+ tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
+ break;
+ default:
+ assert(!"invalid soa_scalar_replicate() call");
+ return;
+ }
+
+ /* replicate the result */
+ for (i = 0; i < 4; i++)
+ tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_dot_product(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4], tmp;
+ struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
+ int i;
+
+ tdst_transpose(dst_[0], dst0);
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ tsrc_transpose(src_[i], src[i]);
+
+ tmp = tc_alloc_tmp(tc);
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_DP2:
+ tc_MUL(tc, tmp, src[0][1], src[1][1]);
+ tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+ break;
+ case TGSI_OPCODE_DP2A:
+ tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
+ tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+ break;
+ case TGSI_OPCODE_DP3:
+ tc_MUL(tc, tmp, src[0][2], src[1][2]);
+ tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+ break;
+ case TGSI_OPCODE_DPH:
+ tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
+ tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+ break;
+ case TGSI_OPCODE_DP4:
+ tc_MUL(tc, tmp, src[0][3], src[1][3]);
+ tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+ break;
+ default:
+ assert(!"invalid soa_dot_product() call");
+ return;
+ }
+
+ for (i = 0; i < 4; i++)
+ tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_partial_derivative(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
+ tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
+ else
+ tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
+}
+
+static void
+soa_if(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_src src0[4];
+
+ assert(tsrc_is_swizzle1(src_[0]));
+ tsrc_transpose(src_[0], src0);
+
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
+ tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ);
+ else
+ tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ);
+}
+
+static void
+soa_LIT(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_inst *inst;
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
+ tc_MOV(tc, dst0[1], src0[0]);
+ tc_POW(tc, dst0[2], src0[1], src0[3]);
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+
+ /*
+ * POW is calculated first because math with pred_ctrl is broken here.
+ * But, why?
+ */
+ tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L);
+ inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+ inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
+ inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+soa_EXP(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ assert(!"SoA EXP untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ if (!tdst_is_null(dst0[0])) {
+ struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
+
+ tc_RNDD(tc, tmp, src0[0]);
+
+ /* construct the floating point number manually */
+ tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
+ tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
+ }
+
+ tc_FRC(tc, dst0[1], src0[0]);
+ tc_EXP(tc, dst0[2], src0[0]);
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_LOG(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ assert(!"SoA LOG untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ if (dst_[0].writemask & TOY_WRITEMASK_XY) {
+ struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
+
+ /* exponent */
+ tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
+ tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
+
+ /* mantissa */
+ tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
+ tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
+ }
+
+ tc_LOG(tc, dst0[2], src0[0]);
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_DST(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src[2][4];
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src[0]);
+ tsrc_transpose(src_[1], src[1]);
+
+ tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
+ tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
+ tc_MOV(tc, dst0[2], src[0][2]);
+ tc_MOV(tc, dst0[3], src[1][3]);
+}
+
+static void
+soa_XPD(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src[2][4];
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src[0]);
+ tsrc_transpose(src_[1], src[1]);
+
+ /* dst.x = src0.y * src1.z - src1.y * src0.z */
+ tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
+ tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
+
+ /* dst.y = src0.z * src1.x - src1.z * src0.x */
+ tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
+ tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
+
+ /* dst.z = src0.x * src1.y - src1.x * src0.y */
+ tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
+ tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
+
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_PK2H(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+ int i;
+
+ assert(!"SoA PK2H untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
+ tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
+
+ for (i = 0; i < 4; i++)
+ tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_UP2H(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ assert(!"SoA UP2H untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
+ tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
+ tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
+ tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
+
+}
+
+static void
+soa_SCS(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
+ tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
+ tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_NRM(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ const struct toy_dst tmp = tc_alloc_tmp(tc);
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+
+ assert(!"SoA NRM untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_MUL(tc, tmp, src0[2], src0[2]);
+ tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
+ tc_INV(tc, tmp, tsrc_from(tmp));
+
+ tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp));
+ tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp));
+ tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp));
+ tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_NRM4(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ const struct toy_dst tmp = tc_alloc_tmp(tc);
+ struct toy_dst dst0[4];
+ struct toy_src src0[4];
+ int i;
+
+ assert(!"SoA NRM4 untested");
+
+ tdst_transpose(dst_[0], dst0);
+ tsrc_transpose(src_[0], src0);
+
+ tc_MUL(tc, tmp, src0[3], src0[3]);
+ tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
+ tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
+ tc_INV(tc, tmp, tsrc_from(tmp));
+
+ for (i = 0; i < 4; i++)
+ tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp));
+}
+
+static void
+soa_unsupported(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst_,
+ struct toy_src *src_)
+{
+ const struct tgsi_opcode_info *info =
+ tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
+
+ ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
+ info->mnemonic);
+
+ tc_fail(tc, "unsupported TGSI instruction in SoA form");
+}
+
+static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
+ [TGSI_OPCODE_ARL] = soa_per_channel,
+ [TGSI_OPCODE_MOV] = soa_per_channel,
+ [TGSI_OPCODE_LIT] = soa_LIT,
+ [TGSI_OPCODE_RCP] = soa_scalar_replicate,
+ [TGSI_OPCODE_RSQ] = soa_scalar_replicate,
+ [TGSI_OPCODE_EXP] = soa_EXP,
+ [TGSI_OPCODE_LOG] = soa_LOG,
+ [TGSI_OPCODE_MUL] = soa_per_channel,
+ [TGSI_OPCODE_ADD] = soa_per_channel,
+ [TGSI_OPCODE_DP3] = soa_dot_product,
+ [TGSI_OPCODE_DP4] = soa_dot_product,
+ [TGSI_OPCODE_DST] = soa_DST,
+ [TGSI_OPCODE_MIN] = soa_per_channel,
+ [TGSI_OPCODE_MAX] = soa_per_channel,
+ [TGSI_OPCODE_SLT] = soa_per_channel,
+ [TGSI_OPCODE_SGE] = soa_per_channel,
+ [TGSI_OPCODE_MAD] = soa_per_channel,
+ [TGSI_OPCODE_SUB] = soa_per_channel,
+ [TGSI_OPCODE_LRP] = soa_per_channel,
+ [TGSI_OPCODE_CND] = soa_per_channel,
+ [TGSI_OPCODE_SQRT] = soa_scalar_replicate,
+ [TGSI_OPCODE_DP2A] = soa_dot_product,
+ [22] = soa_unsupported,
+ [23] = soa_unsupported,
+ [TGSI_OPCODE_FRC] = soa_per_channel,
+ [TGSI_OPCODE_CLAMP] = soa_per_channel,
+ [TGSI_OPCODE_FLR] = soa_per_channel,
+ [TGSI_OPCODE_ROUND] = soa_per_channel,
+ [TGSI_OPCODE_EX2] = soa_scalar_replicate,
+ [TGSI_OPCODE_LG2] = soa_scalar_replicate,
+ [TGSI_OPCODE_POW] = soa_scalar_replicate,
+ [TGSI_OPCODE_XPD] = soa_XPD,
+ [32] = soa_unsupported,
+ [TGSI_OPCODE_ABS] = soa_per_channel,
+ [TGSI_OPCODE_RCC] = soa_unsupported,
+ [TGSI_OPCODE_DPH] = soa_dot_product,
+ [TGSI_OPCODE_COS] = soa_scalar_replicate,
+ [TGSI_OPCODE_DDX] = soa_partial_derivative,
+ [TGSI_OPCODE_DDY] = soa_partial_derivative,
+ [TGSI_OPCODE_KILP] = soa_passthrough,
+ [TGSI_OPCODE_PK2H] = soa_PK2H,
+ [TGSI_OPCODE_PK2US] = soa_unsupported,
+ [TGSI_OPCODE_PK4B] = soa_unsupported,
+ [TGSI_OPCODE_PK4UB] = soa_unsupported,
+ [TGSI_OPCODE_RFL] = soa_unsupported,
+ [TGSI_OPCODE_SEQ] = soa_per_channel,
+ [TGSI_OPCODE_SFL] = soa_per_channel,
+ [TGSI_OPCODE_SGT] = soa_per_channel,
+ [TGSI_OPCODE_SIN] = soa_scalar_replicate,
+ [TGSI_OPCODE_SLE] = soa_per_channel,
+ [TGSI_OPCODE_SNE] = soa_per_channel,
+ [TGSI_OPCODE_STR] = soa_per_channel,
+ [TGSI_OPCODE_TEX] = soa_passthrough,
+ [TGSI_OPCODE_TXD] = soa_passthrough,
+ [TGSI_OPCODE_TXP] = soa_passthrough,
+ [TGSI_OPCODE_UP2H] = soa_UP2H,
+ [TGSI_OPCODE_UP2US] = soa_unsupported,
+ [TGSI_OPCODE_UP4B] = soa_unsupported,
+ [TGSI_OPCODE_UP4UB] = soa_unsupported,
+ [TGSI_OPCODE_X2D] = soa_unsupported,
+ [TGSI_OPCODE_ARA] = soa_unsupported,
+ [TGSI_OPCODE_ARR] = soa_per_channel,
+ [TGSI_OPCODE_BRA] = soa_unsupported,
+ [TGSI_OPCODE_CAL] = soa_unsupported,
+ [TGSI_OPCODE_RET] = soa_unsupported,
+ [TGSI_OPCODE_SSG] = soa_per_channel,
+ [TGSI_OPCODE_CMP] = soa_per_channel,
+ [TGSI_OPCODE_SCS] = soa_SCS,
+ [TGSI_OPCODE_TXB] = soa_passthrough,
+ [TGSI_OPCODE_NRM] = soa_NRM,
+ [TGSI_OPCODE_DIV] = soa_per_channel,
+ [TGSI_OPCODE_DP2] = soa_dot_product,
+ [TGSI_OPCODE_TXL] = soa_passthrough,
+ [TGSI_OPCODE_BRK] = soa_passthrough,
+ [TGSI_OPCODE_IF] = soa_if,
+ [TGSI_OPCODE_UIF] = soa_if,
+ [76] = soa_unsupported,
+ [TGSI_OPCODE_ELSE] = soa_passthrough,
+ [TGSI_OPCODE_ENDIF] = soa_passthrough,
+ [79] = soa_unsupported,
+ [80] = soa_unsupported,
+ [TGSI_OPCODE_PUSHA] = soa_unsupported,
+ [TGSI_OPCODE_POPA] = soa_unsupported,
+ [TGSI_OPCODE_CEIL] = soa_per_channel,
+ [TGSI_OPCODE_I2F] = soa_per_channel,
+ [TGSI_OPCODE_NOT] = soa_per_channel,
+ [TGSI_OPCODE_TRUNC] = soa_per_channel,
+ [TGSI_OPCODE_SHL] = soa_per_channel,
+ [88] = soa_unsupported,
+ [TGSI_OPCODE_AND] = soa_per_channel,
+ [TGSI_OPCODE_OR] = soa_per_channel,
+ [TGSI_OPCODE_MOD] = soa_per_channel,
+ [TGSI_OPCODE_XOR] = soa_per_channel,
+ [TGSI_OPCODE_SAD] = soa_per_channel,
+ [TGSI_OPCODE_TXF] = soa_passthrough,
+ [TGSI_OPCODE_TXQ] = soa_passthrough,
+ [TGSI_OPCODE_CONT] = soa_passthrough,
+ [TGSI_OPCODE_EMIT] = soa_unsupported,
+ [TGSI_OPCODE_ENDPRIM] = soa_unsupported,
+ [TGSI_OPCODE_BGNLOOP] = soa_passthrough,
+ [TGSI_OPCODE_BGNSUB] = soa_unsupported,
+ [TGSI_OPCODE_ENDLOOP] = soa_passthrough,
+ [TGSI_OPCODE_ENDSUB] = soa_unsupported,
+ [TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
+ [104] = soa_unsupported,
+ [105] = soa_unsupported,
+ [106] = soa_unsupported,
+ [TGSI_OPCODE_NOP] = soa_passthrough,
+ [108] = soa_unsupported,
+ [109] = soa_unsupported,
+ [110] = soa_unsupported,
+ [111] = soa_unsupported,
+ [TGSI_OPCODE_NRM4] = soa_NRM4,
+ [TGSI_OPCODE_CALLNZ] = soa_unsupported,
+ [TGSI_OPCODE_BREAKC] = soa_unsupported,
+ [TGSI_OPCODE_KIL] = soa_passthrough,
+ [TGSI_OPCODE_END] = soa_passthrough,
+ [118] = soa_unsupported,
+ [TGSI_OPCODE_F2I] = soa_per_channel,
+ [TGSI_OPCODE_IDIV] = soa_per_channel,
+ [TGSI_OPCODE_IMAX] = soa_per_channel,
+ [TGSI_OPCODE_IMIN] = soa_per_channel,
+ [TGSI_OPCODE_INEG] = soa_per_channel,
+ [TGSI_OPCODE_ISGE] = soa_per_channel,
+ [TGSI_OPCODE_ISHR] = soa_per_channel,
+ [TGSI_OPCODE_ISLT] = soa_per_channel,
+ [TGSI_OPCODE_F2U] = soa_per_channel,
+ [TGSI_OPCODE_U2F] = soa_per_channel,
+ [TGSI_OPCODE_UADD] = soa_per_channel,
+ [TGSI_OPCODE_UDIV] = soa_per_channel,
+ [TGSI_OPCODE_UMAD] = soa_per_channel,
+ [TGSI_OPCODE_UMAX] = soa_per_channel,
+ [TGSI_OPCODE_UMIN] = soa_per_channel,
+ [TGSI_OPCODE_UMOD] = soa_per_channel,
+ [TGSI_OPCODE_UMUL] = soa_per_channel,
+ [TGSI_OPCODE_USEQ] = soa_per_channel,
+ [TGSI_OPCODE_USGE] = soa_per_channel,
+ [TGSI_OPCODE_USHR] = soa_per_channel,
+ [TGSI_OPCODE_USLT] = soa_per_channel,
+ [TGSI_OPCODE_USNE] = soa_per_channel,
+ [TGSI_OPCODE_SWITCH] = soa_unsupported,
+ [TGSI_OPCODE_CASE] = soa_unsupported,
+ [TGSI_OPCODE_DEFAULT] = soa_unsupported,
+ [TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
+ [TGSI_OPCODE_SAMPLE] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
+ [TGSI_OPCODE_GATHER4] = soa_passthrough,
+ [TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
+ [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
+ [TGSI_OPCODE_UARL] = soa_per_channel,
+ [TGSI_OPCODE_UCMP] = soa_per_channel,
+ [TGSI_OPCODE_IABS] = soa_per_channel,
+ [TGSI_OPCODE_ISSG] = soa_per_channel,
+ [TGSI_OPCODE_LOAD] = soa_unsupported,
+ [TGSI_OPCODE_STORE] = soa_unsupported,
+ [TGSI_OPCODE_MFENCE] = soa_unsupported,
+ [TGSI_OPCODE_LFENCE] = soa_unsupported,
+ [TGSI_OPCODE_SFENCE] = soa_unsupported,
+ [TGSI_OPCODE_BARRIER] = soa_unsupported,
+ [TGSI_OPCODE_ATOMUADD] = soa_unsupported,
+ [TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
+ [TGSI_OPCODE_ATOMCAS] = soa_unsupported,
+ [TGSI_OPCODE_ATOMAND] = soa_unsupported,
+ [TGSI_OPCODE_ATOMOR] = soa_unsupported,
+ [TGSI_OPCODE_ATOMXOR] = soa_unsupported,
+ [TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
+ [TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
+ [TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
+ [TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
+ [TGSI_OPCODE_TEX2] = soa_unsupported,
+ [TGSI_OPCODE_TXB2] = soa_unsupported,
+ [TGSI_OPCODE_TXL2] = soa_unsupported,
+};
+
+static bool
+ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
+{
+ return (d->Register.Indirect ||
+ (d->Register.Dimension && d->Dimension.Indirect));
+}
+
+static int
+ra_dst_index(const struct tgsi_full_dst_register *d)
+{
+ assert(!d->Register.Indirect);
+ return d->Register.Index;
+}
+
+static int
+ra_dst_dimension(const struct tgsi_full_dst_register *d)
+{
+ if (d->Register.Dimension) {
+ assert(!d->Dimension.Indirect);
+ return d->Dimension.Index;
+ }
+ else {
+ return 0;
+ }
+}
+
+static bool
+ra_is_src_indirect(const struct tgsi_full_src_register *s)
+{
+ return (s->Register.Indirect ||
+ (s->Register.Dimension && s->Dimension.Indirect));
+}
+
+static int
+ra_src_index(const struct tgsi_full_src_register *s)
+{
+ assert(!s->Register.Indirect);
+ return s->Register.Index;
+}
+
+static int
+ra_src_dimension(const struct tgsi_full_src_register *s)
+{
+ if (s->Register.Dimension) {
+ assert(!s->Dimension.Indirect);
+ return s->Dimension.Index;
+ }
+ else {
+ return 0;
+ }
+}
+
+/**
+ * Infer the type of either the sources or the destination.
+ */
+static enum toy_type
+ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
+{
+ enum toy_type type;
+
+ if (is_dst) {
+ bool type_valid = false;
+
+ switch (tgsi_opcode) {
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXQ_LZ:
+ case TGSI_OPCODE_SAMPLE_I:
+ case TGSI_OPCODE_SAMPLE_I_MS:
+ case TGSI_OPCODE_SAMPLE_POS:
+ type = TOY_TYPE_F;
+ type_valid = true;
+ break;
+ case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_ARR:
+ case TGSI_OPCODE_F2I:
+ type = TOY_TYPE_D;
+ type_valid = true;
+ break;
+ case TGSI_OPCODE_F2U:
+ type = TOY_TYPE_UD;
+ type_valid = true;
+ break;
+ default:
+ break;
+ }
+
+ if (type_valid)
+ return type;
+ }
+
+ switch (tgsi_opcode) {
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_MOD:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD: /* why? */
+ case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXQ_LZ:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_UARL: /* why? */
+ case TGSI_OPCODE_IABS:
+ case TGSI_OPCODE_ISSG:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ type = TOY_TYPE_D;
+ break;
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USHR:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_SAMPLE_I:
+ case TGSI_OPCODE_SAMPLE_I_MS:
+ case TGSI_OPCODE_SVIEWINFO:
+ case TGSI_OPCODE_SAMPLE_POS:
+ case TGSI_OPCODE_SAMPLE_INFO:
+ case TGSI_OPCODE_UCMP:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_STORE:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ type = TOY_TYPE_UD;
+ break;
+ default:
+ type = TOY_TYPE_F;
+ break;
+ }
+
+ return type;
+}
+
+/**
+ * Return the type of an operand of the specified instruction.
+ */
+static enum toy_type
+ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
+ int operand, bool is_dst)
+{
+ enum toy_type type;
+ enum tgsi_file_type file;
+
+ /* we need to look at both src and dst for MOV */
+ /* XXX it should not be this complex */
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+ const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
+ const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
+
+ if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
+ type = TOY_TYPE_D;
+ }
+ else if (src_file == TGSI_FILE_IMMEDIATE &&
+ !tgsi_inst->Src[0].Register.Indirect) {
+ const int src_idx = tgsi_inst->Src[0].Register.Index;
+ type = tgsi->imm_data.types[src_idx];
+ }
+ else {
+ /* this is the best we can do */
+ type = TOY_TYPE_F;
+ }
+
+ return type;
+ }
+
+ type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
+
+ /* fix the type */
+ file = (is_dst) ?
+ tgsi_inst->Dst[operand].Register.File :
+ tgsi_inst->Src[operand].Register.File;
+ switch (file) {
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_SAMPLER_VIEW:
+ type = TOY_TYPE_D;
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(type == TOY_TYPE_D);
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+/**
+ * Allocate a VRF register.
+ */
+static int
+ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
+{
+ const int count = (tgsi->aos) ? 1 : 4;
+ return tc_alloc_vrf(tgsi->tc, count);
+}
+
+/**
+ * Construct the key for VRF mapping look-up.
+ */
+static void *
+ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
+{
+ intptr_t key;
+
+ /* this is ugly... */
+ assert(file < 1 << 4);
+ assert(dim < 1 << 12);
+ assert(index < 1 << 16);
+ key = (file << 28) | (dim << 16) | index;
+
+ return intptr_to_pointer(key);
+}
+
+/**
+ * Map a TGSI register to a VRF register.
+ */
+static int
+ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
+ int dim, int index, bool *is_new)
+{
+ void *key, *val;
+ intptr_t vrf;
+
+ key = ra_get_map_key(file, dim, index);
+
+ /*
+ * because we allocate vrf from 1 and on, val is never NULL as long as the
+ * key exists
+ */
+ val = util_hash_table_get(tgsi->reg_mapping, key);
+ if (val) {
+ vrf = pointer_to_intptr(val);
+
+ if (is_new)
+ *is_new = false;
+ }
+ else {
+ vrf = (intptr_t) ra_alloc_reg(tgsi, file);
+
+ /* add to the mapping */
+ val = intptr_to_pointer(vrf);
+ util_hash_table_set(tgsi->reg_mapping, key, val);
+
+ if (is_new)
+ *is_new = true;
+ }
+
+ return (int) vrf;
+}
+
+/**
+ * Return true if the destination aliases any of the sources.
+ */
+static bool
+ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
+{
+ const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
+ int i;
+
+ /* we need a scratch register for indirect dst anyway */
+ if (ra_dst_is_indirect(d))
+ return true;
+
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
+
+ if (s->Register.File != d->Register.File)
+ continue;
+
+ /*
+ * we can go on to check dimension and index respectively, but
+ * keep it simple for now
+ */
+ if (ra_is_src_indirect(s))
+ return true;
+ if (ra_src_dimension(s) == ra_dst_dimension(d) &&
+ ra_src_index(s) == ra_dst_index(d))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * Return the toy register for a TGSI destination operand.
+ */
+static struct toy_dst
+ra_get_dst(struct toy_tgsi *tgsi,
+ const struct tgsi_full_instruction *tgsi_inst, int dst_index,
+ bool *is_scratch)
+{
+ const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
+ bool need_vrf = false;
+ struct toy_dst dst;
+
+ switch (d->Register.File) {
+ case TGSI_FILE_NULL:
+ dst = tdst_null();
+ break;
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_PREDICATE:
+ need_vrf = true;
+ break;
+ default:
+ assert(!"unhandled dst file");
+ dst = tdst_null();
+ break;
+ }
+
+ if (need_vrf) {
+ /* XXX we do not always need a scratch given the conditions... */
+ const bool need_scratch =
+ (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
+ tgsi_inst->Instruction.Saturate);
+ const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
+ int vrf;
+
+ if (need_scratch) {
+ vrf = ra_alloc_reg(tgsi, d->Register.File);
+ }
+ else {
+ vrf = ra_map_reg(tgsi, d->Register.File,
+ ra_dst_dimension(d), ra_dst_index(d), NULL);
+ }
+
+ if (is_scratch)
+ *is_scratch = need_scratch;
+
+ dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+ false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
+ }
+
+ return dst;
+}
+
+static struct toy_src
+ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
+ enum toy_type type, int vrf)
+{
+ return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+ false, 0,
+ s->Register.SwizzleX, s->Register.SwizzleY,
+ s->Register.SwizzleZ, s->Register.SwizzleW,
+ s->Register.Absolute, s->Register.Negate,
+ vrf * TOY_REG_WIDTH);
+}
+
+static int
+init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
+ enum tgsi_file_type file, int index,
+ const struct tgsi_ind_register *indirect,
+ const struct tgsi_dimension *dimension,
+ const struct tgsi_ind_register *dim_indirect)
+{
+ struct toy_src src;
+ int num_src = 0;
+
+ /* src[0]: TGSI file */
+ inst->src[num_src++] = tsrc_imm_d(file);
+
+ /* src[1]: TGSI dimension */
+ inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
+
+ /* src[2]: TGSI dimension indirection */
+ if (dim_indirect) {
+ const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
+ dim_indirect->Index, NULL);
+
+ src = tsrc(TOY_FILE_VRF, vrf, 0);
+ src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
+ }
+ else {
+ src = tsrc_imm_d(0);
+ }
+
+ inst->src[num_src++] = src;
+
+ /* src[3]: TGSI index */
+ inst->src[num_src++] = tsrc_imm_d(index);
+
+ /* src[4]: TGSI index indirection */
+ if (indirect) {
+ const int vrf = ra_map_reg(tgsi, indirect->File, 0,
+ indirect->Index, NULL);
+
+ src = tsrc(TOY_FILE_VRF, vrf, 0);
+ src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
+ }
+ else {
+ src = tsrc_imm_d(0);
+ }
+
+ inst->src[num_src++] = src;
+
+ return num_src;
+}
+
+static struct toy_src
+ra_get_src_indirect(struct toy_tgsi *tgsi,
+ const struct tgsi_full_instruction *tgsi_inst,
+ int src_index)
+{
+ const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
+ bool need_vrf = false, is_resource = false;
+ struct toy_src src;
+
+ switch (s->Register.File) {
+ case TGSI_FILE_NULL:
+ src = tsrc_null();
+ break;
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_SAMPLER_VIEW:
+ is_resource = true;
+ /* fall through */
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_IMMEDIATE:
+ case TGSI_FILE_PREDICATE:
+ need_vrf = true;
+ break;
+ default:
+ assert(!"unhandled src file");
+ src = tsrc_null();
+ break;
+ }
+
+ if (need_vrf) {
+ const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
+ int vrf;
+
+ if (is_resource) {
+ assert(!s->Register.Dimension);
+ assert(s->Register.Indirect);
+
+ vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
+ }
+ else {
+ vrf = ra_alloc_reg(tgsi, s->Register.File);
+ }
+
+ src = ra_get_src_for_vrf(s, type, vrf);
+
+ /* emit indirect fetch */
+ if (!is_resource) {
+ struct toy_inst *inst;
+
+ inst = tc_add(tgsi->tc);
+ inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
+ inst->dst = tdst_from(src);
+ inst->dst.writemask = TOY_WRITEMASK_XYZW;
+
+ init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
+ (s->Register.Indirect) ? &s->Indirect : NULL,
+ (s->Register.Dimension) ? &s->Dimension : NULL,
+ (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
+ }
+ }
+
+ return src;
+}
+
+/**
+ * Return the toy register for a TGSI source operand.
+ */
+static struct toy_src
+ra_get_src(struct toy_tgsi *tgsi,
+ const struct tgsi_full_instruction *tgsi_inst,
+ int src_index)
+{
+ const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
+ bool need_vrf = false;
+ struct toy_src src;
+
+ if (ra_is_src_indirect(s))
+ return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
+
+ switch (s->Register.File) {
+ case TGSI_FILE_NULL:
+ src = tsrc_null();
+ break;
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
+ need_vrf = true;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_PREDICATE:
+ need_vrf = true;
+ break;
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_SAMPLER_VIEW:
+ assert(!s->Register.Dimension);
+ src = tsrc_imm_d(s->Register.Index);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ {
+ const uint32_t *imm;
+ enum toy_type imm_type;
+ bool is_scalar;
+
+ imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
+
+ is_scalar =
+ (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
+ imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
+ imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
+
+ if (is_scalar) {
+ const enum toy_type type =
+ ra_get_type(tgsi, tgsi_inst, src_index, false);
+
+ /* ignore imm_type */
+ src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
+ src.type = type;
+ src.absolute = s->Register.Absolute;
+ src.negate = s->Register.Negate;
+ }
+ else {
+ need_vrf = true;
+ }
+ }
+ break;
+ default:
+ assert(!"unhandled src file");
+ src = tsrc_null();
+ break;
+ }
+
+ if (need_vrf) {
+ const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
+ bool is_new;
+ int vrf;
+
+ vrf = ra_map_reg(tgsi, s->Register.File,
+ ra_src_dimension(s), ra_src_index(s), &is_new);
+
+ src = ra_get_src_for_vrf(s, type, vrf);
+
+ if (is_new) {
+ switch (s->Register.File) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_PREDICATE:
+ {
+ struct toy_dst dst = tdst_from(src);
+ dst.writemask = TOY_WRITEMASK_XYZW;
+
+ /*
+ * Always initialize registers. Otherwise, if the random value
+ * ends up in a VUE, FS may fail to interpolate correctly.
+ */
+ tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ }
+
+ return src;
+}
+
+static void
+parse_instruction(struct toy_tgsi *tgsi,
+ const struct tgsi_full_instruction *tgsi_inst)
+{
+ struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
+ struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
+ bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
+ toy_tgsi_translate translate;
+ int i;
+
+ /* convert TGSI registers to toy registers */
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+ src[i] = ra_get_src(tgsi, tgsi_inst, i);
+ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+ dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
+
+ /* translate the instruction */
+ translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
+ translate(tgsi->tc, tgsi_inst, dst, src);
+
+ /* write the result to the real destinations if needed */
+ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
+
+ if (!dst_is_scratch[i])
+ continue;
+
+ if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
+ tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
+
+ tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
+
+ /* emit indirect store */
+ if (ra_dst_is_indirect(d)) {
+ struct toy_inst *inst;
+
+ inst = tc_add(tgsi->tc);
+ inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
+ inst->dst = dst[i];
+
+ init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
+ (d->Register.Indirect) ? &d->Indirect : NULL,
+ (d->Register.Dimension) ? &d->Dimension : NULL,
+ (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
+ }
+ else {
+ const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
+ struct toy_dst real_dst;
+ int vrf;
+
+ vrf = ra_map_reg(tgsi, d->Register.File,
+ ra_dst_dimension(d), ra_dst_index(d), NULL);
+ real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+ false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
+
+ if (tgsi->aos) {
+ tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
+ }
+ else {
+ struct toy_dst tdst[4];
+ struct toy_src tsrc[4];
+ int j;
+
+ tdst_transpose(real_dst, tdst);
+ tsrc_transpose(tsrc_from(dst[i]), tsrc);
+
+ for (j = 0; j < 4; j++)
+ tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
+ }
+ }
+
+ tgsi->tc->templ.saturate = false;
+ }
+
+ switch (tgsi_inst->Instruction.Opcode) {
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_KILP:
+ tgsi->uses_kill = true;
+ break;
+ }
+
+ /* remember channels written */
+ for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
+
+ if (d->Register.File != TGSI_FILE_OUTPUT)
+ continue;
+ for (i = 0; i < tgsi->num_outputs; i++) {
+ if (tgsi->outputs[i].index == d->Register.Index) {
+ tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
+ break;
+ }
+ }
+ }
+}
+
+static void
+decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+ static const struct tgsi_declaration_interp default_interp = {
+ TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
+ };
+ const struct tgsi_declaration_interp *interp =
+ (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
+ int index;
+
+ if (decl->Range.Last >= Elements(tgsi->inputs)) {
+ assert(!"invalid IN");
+ return;
+ }
+
+ for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+ const int slot = tgsi->num_inputs++;
+
+ tgsi->inputs[slot].index = index;
+ tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
+ if (decl->Declaration.Semantic) {
+ tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
+ tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
+ }
+ else {
+ tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
+ tgsi->inputs[slot].semantic_index = index;
+ }
+ tgsi->inputs[slot].interp = interp->Interpolate;
+ tgsi->inputs[slot].centroid = interp->Centroid;
+ }
+}
+
+static void
+decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+ int index;
+
+ if (decl->Range.Last >= Elements(tgsi->outputs)) {
+ assert(!"invalid OUT");
+ return;
+ }
+
+ assert(decl->Declaration.Semantic);
+
+ for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+ const int slot = tgsi->num_outputs++;
+
+ tgsi->outputs[slot].index = index;
+ tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
+ tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
+ tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
+ tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
+ }
+}
+
+static void
+decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+ int index;
+
+ if (decl->Range.Last >= Elements(tgsi->system_values)) {
+ assert(!"invalid SV");
+ return;
+ }
+
+ for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+ const int slot = tgsi->num_system_values++;
+
+ tgsi->system_values[slot].index = index;
+ if (decl->Declaration.Semantic) {
+ tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
+ tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
+ }
+ else {
+ tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
+ tgsi->system_values[slot].semantic_index = index;
+ }
+ }
+}
+
+/**
+ * Emit an instruction to fetch the value of a TGSI register.
+ */
+static void
+fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
+{
+ struct toy_dst dst;
+ int vrf;
+ enum toy_opcode opcode;
+ enum toy_type type = TOY_TYPE_F;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ opcode = TOY_OPCODE_TGSI_IN;
+ break;
+ case TGSI_FILE_CONSTANT:
+ opcode = TOY_OPCODE_TGSI_CONST;
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ opcode = TOY_OPCODE_TGSI_SV;
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ opcode = TOY_OPCODE_TGSI_IMM;
+ toy_tgsi_get_imm(tgsi, idx, &type);
+ break;
+ default:
+ /* no need to fetch */
+ return;
+ break;
+ }
+
+ vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
+ dst = tdst(TOY_FILE_VRF, vrf, 0);
+ dst = tdst_type(dst, type);
+
+ tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
+}
+
+static void
+parse_declaration(struct toy_tgsi *tgsi,
+ const struct tgsi_full_declaration *decl)
+{
+ int i;
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ decl_add_in(tgsi, decl);
+ break;
+ case TGSI_FILE_OUTPUT:
+ decl_add_out(tgsi, decl);
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ decl_add_sv(tgsi, decl);
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
+ assert(!"unexpected immediate declaration");
+ break;
+ case TGSI_FILE_NULL:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_PREDICATE:
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_RESOURCE:
+ case TGSI_FILE_SAMPLER_VIEW:
+ /* nothing to do */
+ break;
+ default:
+ assert(!"unhandled TGSI file");
+ break;
+ }
+
+ /* fetch the registers now */
+ for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+ const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
+ fetch_source(tgsi, decl->Declaration.File, dim, i);
+ }
+}
+
+static int
+add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
+{
+ /* reallocate the buffer if necessary */
+ if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
+ const int cur_size = tgsi->imm_data.size;
+ int new_size;
+ enum toy_type *new_types;
+ uint32_t (*new_buf)[4];
+
+ new_size = (cur_size) ? cur_size << 1 : 16;
+ while (new_size <= tgsi->imm_data.cur)
+ new_size <<= 1;
+
+ new_buf = REALLOC(tgsi->imm_data.buf,
+ cur_size * sizeof(new_buf[0]),
+ new_size * sizeof(new_buf[0]));
+ new_types = REALLOC(tgsi->imm_data.types,
+ cur_size * sizeof(new_types[0]),
+ new_size * sizeof(new_types[0]));
+ if (!new_buf || !new_types) {
+ if (new_buf)
+ FREE(new_buf);
+ if (new_types)
+ FREE(new_types);
+ return -1;
+ }
+
+ tgsi->imm_data.buf = new_buf;
+ tgsi->imm_data.types = new_types;
+ tgsi->imm_data.size = new_size;
+ }
+
+ tgsi->imm_data.types[tgsi->imm_data.cur] = type;
+ memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
+ buf, sizeof(tgsi->imm_data.buf[0]));
+
+ return tgsi->imm_data.cur++;
+}
+
+static void
+parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
+{
+ enum toy_type type;
+ uint32_t imm_buf[4];
+ int idx;
+
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ type = TOY_TYPE_F;
+ imm_buf[0] = fui(imm->u[0].Float);
+ imm_buf[1] = fui(imm->u[1].Float);
+ imm_buf[2] = fui(imm->u[2].Float);
+ imm_buf[3] = fui(imm->u[3].Float);
+ break;
+ case TGSI_IMM_INT32:
+ type = TOY_TYPE_D;
+ imm_buf[0] = (uint32_t) imm->u[0].Int;
+ imm_buf[1] = (uint32_t) imm->u[1].Int;
+ imm_buf[2] = (uint32_t) imm->u[2].Int;
+ imm_buf[3] = (uint32_t) imm->u[3].Int;
+ break;
+ case TGSI_IMM_UINT32:
+ type = TOY_TYPE_UD;
+ imm_buf[0] = imm->u[0].Uint;
+ imm_buf[1] = imm->u[1].Uint;
+ imm_buf[2] = imm->u[2].Uint;
+ imm_buf[3] = imm->u[3].Uint;
+ break;
+ default:
+ assert(!"unhandled TGSI imm type");
+ type = TOY_TYPE_F;
+ memset(imm_buf, 0, sizeof(imm_buf));
+ break;
+ }
+
+ idx = add_imm(tgsi, type, imm_buf);
+ if (idx >= 0)
+ fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
+ else
+ tc_fail(tgsi->tc, "failed to add TGSI imm");
+}
+
+static void
+parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
+{
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
+ tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_FS_COORD_ORIGIN:
+ tgsi->props.fs_coord_origin = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+ tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+ tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
+ tgsi->props.fs_depth_layout = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ tgsi->props.gs_input_prim = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ tgsi->props.gs_output_prim = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ tgsi->props.gs_max_output_vertices = prop->u[0].Data;
+ break;
+ default:
+ assert(!"unhandled TGSI property");
+ break;
+ }
+}
+
+static void
+parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
+{
+ switch (token->Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ parse_declaration(tgsi, &token->FullDeclaration);
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ parse_immediate(tgsi, &token->FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ parse_instruction(tgsi, &token->FullInstruction);
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ parse_property(tgsi, &token->FullProperty);
+ break;
+ default:
+ assert(!"unhandled TGSI token type");
+ break;
+ }
+}
+
+static enum pipe_error
+dump_reg_mapping(void *key, void *val, void *data)
+{
+ int tgsi_file, tgsi_dim, tgsi_index;
+ uint32_t sig, vrf;
+
+ sig = (uint32_t) pointer_to_intptr(key);
+ vrf = (uint32_t) pointer_to_intptr(val);
+
+ /* see ra_get_map_key() */
+ tgsi_file = (sig >> 28) & 0xf;
+ tgsi_dim = (sig >> 16) & 0xfff;
+ tgsi_index = (sig >> 0) & 0xffff;
+
+ if (tgsi_dim) {
+ ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
+ tgsi_file_names[tgsi_file], tgsi_dim, tgsi_index);
+ }
+ else {
+ ilo_printf(" v%d:\t%s[%d]\n", vrf,
+ tgsi_file_names[tgsi_file], tgsi_index);
+ }
+
+ return PIPE_OK;
+}
+
+/**
+ * Dump the TGSI translator, currently only the register mapping.
+ */
+void
+toy_tgsi_dump(const struct toy_tgsi *tgsi)
+{
+ util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
+}
+
+/**
+ * Clean up the TGSI translator.
+ */
+void
+toy_tgsi_cleanup(struct toy_tgsi *tgsi)
+{
+ FREE(tgsi->imm_data.buf);
+ FREE(tgsi->imm_data.types);
+
+ util_hash_table_destroy(tgsi->reg_mapping);
+}
+
+static unsigned
+reg_mapping_hash(void *key)
+{
+ return (unsigned) pointer_to_intptr(key);
+}
+
+static int
+reg_mapping_compare(void *key1, void *key2)
+{
+ return (key1 != key2);
+}
+
+/**
+ * Initialize the TGSI translator.
+ */
+static bool
+init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
+{
+ memset(tgsi, 0, sizeof(*tgsi));
+
+ tgsi->tc = tc;
+ tgsi->aos = aos;
+ tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
+
+ /* create a mapping of TGSI registers to VRF reigsters */
+ tgsi->reg_mapping =
+ util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
+
+ return (tgsi->reg_mapping != NULL);
+}
+
+/**
+ * Translate TGSI tokens into toy instructions.
+ */
+void
+toy_compiler_translate_tgsi(struct toy_compiler *tc,
+ const struct tgsi_token *tokens, bool aos,
+ struct toy_tgsi *tgsi)
+{
+ struct tgsi_parse_context parse;
+
+ if (!init_tgsi(tgsi, tc, aos)) {
+ tc_fail(tc, "failed to initialize TGSI translator");
+ return;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+ parse_token(tgsi, &parse.FullToken);
+ }
+ tgsi_parse_free(&parse);
+}
+
+/**
+ * Map the TGSI register to VRF register.
+ */
+int
+toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
+ enum tgsi_file_type file, int dimension, int index)
+{
+ void *key, *val;
+
+ key = ra_get_map_key(file, dimension, index);
+
+ val = util_hash_table_get(tgsi->reg_mapping, key);
+
+ return (val) ? pointer_to_intptr(val) : -1;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h
new file mode 100644
index 00000000000..1bfb57f6c7f
--- /dev/null
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.h
@@ -0,0 +1,253 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <[email protected]>
+ */
+
+#ifndef TOY_TGSI_H
+#define TOY_TGSI_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "toy_compiler.h"
+
+struct tgsi_token;
+struct tgsi_full_instruction;
+struct util_hash_table;
+
+typedef void (*toy_tgsi_translate)(struct toy_compiler *tc,
+ const struct tgsi_full_instruction *tgsi_inst,
+ struct toy_dst *dst,
+ struct toy_src *src);
+
+struct toy_tgsi {
+ struct toy_compiler *tc;
+ bool aos;
+ const toy_tgsi_translate *translate_table;
+
+ struct util_hash_table *reg_mapping;
+
+ struct {
+ bool vs_prohibit_ucps;
+ int fs_coord_origin;
+ int fs_coord_pixel_center;
+ bool fs_color0_writes_all_cbufs;
+ int fs_depth_layout;
+ int gs_input_prim;
+ int gs_output_prim;
+ int gs_max_output_vertices;
+ } props;
+
+ struct {
+ enum toy_type *types;
+ uint32_t (*buf)[4];
+ int cur, size;
+ } imm_data;
+
+ struct {
+ int index:16;
+ unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
+ unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
+ unsigned semantic_index:8;
+ unsigned interp:4; /* TGSI_INTERPOLATE_x */
+ unsigned centroid:1;
+ } inputs[PIPE_MAX_SHADER_INPUTS];
+ int num_inputs;
+
+ struct {
+ int index:16;
+ unsigned undefined_mask:4;
+ unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
+ unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
+ unsigned semantic_index:8;
+ } outputs[PIPE_MAX_SHADER_OUTPUTS];
+ int num_outputs;
+
+ struct {
+ int index:16;
+ unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
+ unsigned semantic_index:8;
+ } system_values[8];
+ int num_system_values;
+
+ bool uses_kill;
+};
+
+/**
+ * Find the slot of the TGSI input.
+ */
+static inline int
+toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index)
+{
+ int slot;
+
+ for (slot = 0; slot < tgsi->num_inputs; slot++) {
+ if (tgsi->inputs[slot].index == index)
+ return slot;
+ }
+
+ return -1;
+}
+
+/**
+ * Find the slot of the TGSI system value.
+ */
+static inline int
+toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index)
+{
+ int slot;
+
+ for (slot = 0; slot < tgsi->num_system_values; slot++) {
+ if (tgsi->system_values[slot].index == index)
+ return slot;
+ }
+
+ return -1;
+}
+
+/**
+ * Return the immediate data of the TGSI immediate.
+ */
+static inline const uint32_t *
+toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index,
+ enum toy_type *type)
+{
+ const uint32_t *imm;
+
+ if (index >= tgsi->imm_data.cur)
+ return NULL;
+
+ imm = tgsi->imm_data.buf[index];
+ if (type)
+ *type = tgsi->imm_data.types[index];
+
+ return imm;
+}
+
+/**
+ * Return the dimension of the texture coordinates, as well as the location of
+ * the shadow reference value or the sample index.
+ */
+static inline int
+toy_tgsi_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
+{
+ int dim;
+
+ /*
+ * Depending on the texture target, (src0, src1.x) is interpreted
+ * differently:
+ *
+ * (s, *, *, *, *), for 1D
+ * (s, t, *, *, *), for 2D, RECT
+ * (s, t, r, *, *), for 3D, CUBE
+ *
+ * (s, layer, *, *, *), for 1D_ARRAY
+ * (s, t, layer, *, *), for 2D_ARRAY
+ * (s, t, r, layer, *), for CUBE_ARRAY
+ *
+ * (s, *, shadow, *, *), for SHADOW1D
+ * (s, t, shadow, *, *), for SHADOW2D, SHADOWRECT
+ * (s, t, r, shadow, *), for SHADOWCUBE
+ *
+ * (s, layer, shadow, *, *), for SHADOW1D_ARRAY
+ * (s, t, layer, shadow, *), for SHADOW2D_ARRAY
+ * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
+ *
+ * (s, t, sample, *, *), for 2D_MSAA
+ * (s, t, layer, sample, *), for 2D_ARRAY_MSAA
+ */
+ switch (tgsi_tex) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ dim = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ dim = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ dim = 3;
+ break;
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ dim = 4;
+ break;
+ default:
+ assert(!"unknown texture target");
+ dim = 0;
+ break;
+ }
+
+ if (shadow_or_sample) {
+ switch (tgsi_tex) {
+ case TGSI_TEXTURE_SHADOW1D:
+ /* there is a gap */
+ *shadow_or_sample = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ *shadow_or_sample = dim;
+ break;
+ default:
+ /* no shadow nor sample */
+ *shadow_or_sample = -1;
+ break;
+ }
+ }
+
+ return dim;
+}
+
+void
+toy_compiler_translate_tgsi(struct toy_compiler *tc,
+ const struct tgsi_token *tokens, bool aos,
+ struct toy_tgsi *tgsi);
+
+void
+toy_tgsi_cleanup(struct toy_tgsi *tgsi);
+
+int
+toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
+ enum tgsi_file_type file, int dimension, int index);
+
+void
+toy_tgsi_dump(const struct toy_tgsi *tgsi);
+
+#endif /* TOY_TGSI_H */