diff options
author | Chia-I Wu <olv@lunarg.com> | 2012-12-13 05:48:28 +0800 |
---|---|---|
committer | Chia-I Wu <olvaffe@gmail.com> | 2013-04-26 16:20:52 +0800 |
commit | 7118ff8bb02046bb2f440e2a5c48d9a41bb057b1 (patch) | |
tree | facfe5b880db82818b95521da5edc5803de749be /src | |
parent | 0fa2d0e98aec1e864f7edc225eb9cda449051733 (diff) |
ilo: add a toy shader compiler
This is a simple shader compiler that performs almost zero optimizations. The
generated code is usually much larger comparing to that generated by i965.
The generated code also requires many more registers.
Function-wise, it lacks register spilling and does not support most TGSI
indirections. Other than those, it works alright.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/ilo/Makefile.sources | 9 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_compiler.c | 556 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_compiler.h | 473 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_compiler_asm.c | 750 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_compiler_disasm.c | 1385 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_compiler_reg.h | 800 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_helpers.h | 289 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_legalize.c | 632 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_legalize.h | 52 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_legalize_ra.c | 628 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_optimize.c | 71 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_optimize.h | 36 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_tgsi.c | 2736 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/shader/toy_tgsi.h | 253 |
14 files changed, 8669 insertions, 1 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 448f1d19736..c19801e7281 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -16,4 +16,11 @@ C_SOURCES := \ ilo_screen.c \ ilo_shader.c \ ilo_state.c \ - ilo_video.c + ilo_video.c \ + shader/toy_compiler.c \ + shader/toy_compiler_asm.c \ + shader/toy_compiler_disasm.c \ + shader/toy_legalize.c \ + shader/toy_legalize_ra.c \ + shader/toy_optimize.c \ + shader/toy_tgsi.c diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.c b/src/gallium/drivers/ilo/shader/toy_compiler.c new file mode 100644 index 00000000000..73b03e62594 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_compiler.c @@ -0,0 +1,556 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "toy_compiler.h" + +/** + * Dump an operand. + */ +static void +tc_dump_operand(struct toy_compiler *tc, + enum toy_file file, enum toy_type type, enum toy_rect rect, + bool indirect, unsigned indirect_subreg, uint32_t val32, + bool is_dst) +{ + static const char *toy_file_names[TOY_FILE_COUNT] = { + [TOY_FILE_VRF] = "v", + [TOY_FILE_ARF] = "NOT USED", + [TOY_FILE_GRF] = "r", + [TOY_FILE_MRF] = "m", + [TOY_FILE_IMM] = "NOT USED", + }; + const char *name = toy_file_names[file]; + int reg, subreg; + + if (file != TOY_FILE_IMM) { + reg = val32 / TOY_REG_WIDTH; + subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type); + } + + switch (file) { + case TOY_FILE_GRF: + if (indirect) { + const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW); + + ilo_printf("%s[a0.%d", name, addr_subreg); + if (val32) + ilo_printf("%+d", (int) val32); + ilo_printf("]"); + break; + } + /* fall through */ + case TOY_FILE_VRF: + case TOY_FILE_MRF: + ilo_printf("%s%d", name, reg); + if (subreg) + ilo_printf(".%d", subreg); + break; + case TOY_FILE_ARF: + switch (reg) { + case BRW_ARF_NULL: + ilo_printf("null"); + break; + case BRW_ARF_ADDRESS: + ilo_printf("a0.%d", subreg); + break; + case BRW_ARF_ACCUMULATOR: + case BRW_ARF_ACCUMULATOR + 1: + ilo_printf("acc%d.%d", (reg & 1), subreg); + break; + case BRW_ARF_FLAG: + ilo_printf("f0.%d", subreg); + break; + case BRW_ARF_STATE: + ilo_printf("sr0.%d", subreg); + break; + case BRW_ARF_CONTROL: + ilo_printf("cr0.%d", subreg); + break; + case BRW_ARF_NOTIFICATION_COUNT: + case BRW_ARF_NOTIFICATION_COUNT + 1: + ilo_printf("n%d.%d", (reg & 1), subreg); + break; + case BRW_ARF_IP: + ilo_printf("ip"); + break; + } + break; + case TOY_FILE_IMM: + switch (type) { + case TOY_TYPE_F: + { + union fi fi = { .ui = val32 }; + ilo_printf("%f", fi.f); + } + break; + case TOY_TYPE_D: + ilo_printf("%d", (int32_t) val32); + break; + case TOY_TYPE_UD: + ilo_printf("%u", val32); + break; + case TOY_TYPE_W: + ilo_printf("%d", (int16_t) (val32 & 0xffff)); + break; + case TOY_TYPE_UW: + ilo_printf("%u", val32 & 0xffff); + break; + case TOY_TYPE_V: + ilo_printf("0x%08x", val32); + break; + default: + assert(!"unknown imm type"); + break; + } + break; + default: + assert(!"unexpected file"); + break; + } + + /* dump the region parameter */ + if (file != TOY_FILE_IMM) { + int vert_stride, width, horz_stride; + + switch (rect) { + case TOY_RECT_LINEAR: + vert_stride = tc->rect_linear_width; + width = tc->rect_linear_width; + horz_stride = 1; + break; + case TOY_RECT_041: + vert_stride = 0; + width = 4; + horz_stride = 1; + break; + case TOY_RECT_010: + vert_stride = 0; + width = 1; + horz_stride = 0; + break; + case TOY_RECT_220: + vert_stride = 2; + width = 2; + horz_stride = 0; + break; + case TOY_RECT_440: + vert_stride = 4; + width = 4; + horz_stride = 0; + break; + case TOY_RECT_240: + vert_stride = 2; + width = 4; + horz_stride = 0; + break; + default: + assert(!"unknown rect parameter"); + vert_stride = 0; + width = 0; + horz_stride = 0; + break; + } + + if (is_dst) + ilo_printf("<%d>", horz_stride); + else + ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride); + } + + switch (type) { + case TOY_TYPE_F: + ilo_printf(":f"); + break; + case TOY_TYPE_D: + ilo_printf(":d"); + break; + case TOY_TYPE_UD: + ilo_printf(":ud"); + break; + case TOY_TYPE_W: + ilo_printf(":w"); + break; + case TOY_TYPE_UW: + ilo_printf(":uw"); + break; + case TOY_TYPE_V: + ilo_printf(":v"); + break; + default: + assert(!"unexpected type"); + break; + } +} + +/** + * Dump a source operand. + */ +static void +tc_dump_src(struct toy_compiler *tc, struct toy_src src) +{ + if (src.negate) + ilo_printf("-"); + if (src.absolute) + ilo_printf("|"); + + tc_dump_operand(tc, src.file, src.type, src.rect, + src.indirect, src.indirect_subreg, src.val32, false); + + if (tsrc_is_swizzled(src)) { + const char xyzw[] = "xyzw"; + ilo_printf(".%c%c%c%c", + xyzw[src.swizzle_x], + xyzw[src.swizzle_y], + xyzw[src.swizzle_z], + xyzw[src.swizzle_w]); + } + + if (src.absolute) + ilo_printf("|"); +} + +/** + * Dump a destination operand. + */ +static void +tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst) +{ + tc_dump_operand(tc, dst.file, dst.type, dst.rect, + dst.indirect, dst.indirect_subreg, dst.val32, true); + + if (dst.writemask != TOY_WRITEMASK_XYZW) { + ilo_printf("."); + if (dst.writemask & TOY_WRITEMASK_X) + ilo_printf("x"); + if (dst.writemask & TOY_WRITEMASK_Y) + ilo_printf("y"); + if (dst.writemask & TOY_WRITEMASK_Z) + ilo_printf("z"); + if (dst.writemask & TOY_WRITEMASK_W) + ilo_printf("w"); + } +} + +static const char * +get_opcode_name(unsigned opcode) +{ + switch (opcode) { + case BRW_OPCODE_MOV: return "mov"; + case BRW_OPCODE_SEL: return "sel"; + case BRW_OPCODE_NOT: return "not"; + case BRW_OPCODE_AND: return "and"; + case BRW_OPCODE_OR: return "or"; + case BRW_OPCODE_XOR: return "xor"; + case BRW_OPCODE_SHR: return "shr"; + case BRW_OPCODE_SHL: return "shl"; + case BRW_OPCODE_RSR: return "rsr"; + case BRW_OPCODE_RSL: return "rsl"; + case BRW_OPCODE_ASR: return "asr"; + case BRW_OPCODE_CMP: return "cmp"; + case BRW_OPCODE_CMPN: return "cmpn"; + case BRW_OPCODE_JMPI: return "jmpi"; + case BRW_OPCODE_IF: return "if"; + case BRW_OPCODE_IFF: return "iff"; + case BRW_OPCODE_ELSE: return "else"; + case BRW_OPCODE_ENDIF: return "endif"; + case BRW_OPCODE_DO: return "do"; + case BRW_OPCODE_WHILE: return "while"; + case BRW_OPCODE_BREAK: return "break"; + case BRW_OPCODE_CONTINUE: return "continue"; + case BRW_OPCODE_HALT: return "halt"; + case BRW_OPCODE_MSAVE: return "msave"; + case BRW_OPCODE_MRESTORE: return "mrestore"; + case BRW_OPCODE_PUSH: return "push"; + case BRW_OPCODE_POP: return "pop"; + case BRW_OPCODE_WAIT: return "wait"; + case BRW_OPCODE_SEND: return "send"; + case BRW_OPCODE_SENDC: return "sendc"; + case BRW_OPCODE_MATH: return "math"; + case BRW_OPCODE_ADD: return "add"; + case BRW_OPCODE_MUL: return "mul"; + case BRW_OPCODE_AVG: return "avg"; + case BRW_OPCODE_FRC: return "frc"; + case BRW_OPCODE_RNDU: return "rndu"; + case BRW_OPCODE_RNDD: return "rndd"; + case BRW_OPCODE_RNDE: return "rnde"; + case BRW_OPCODE_RNDZ: return "rndz"; + case BRW_OPCODE_MAC: return "mac"; + case BRW_OPCODE_MACH: return "mach"; + case BRW_OPCODE_LZD: return "lzd"; + case BRW_OPCODE_SAD2: return "sad2"; + case BRW_OPCODE_SADA2: return "sada2"; + case BRW_OPCODE_DP4: return "dp4"; + case BRW_OPCODE_DPH: return "dph"; + case BRW_OPCODE_DP3: return "dp3"; + case BRW_OPCODE_DP2: return "dp2"; + case BRW_OPCODE_DPA2: return "dpa2"; + case BRW_OPCODE_LINE: return "line"; + case BRW_OPCODE_PLN: return "pln"; + case BRW_OPCODE_MAD: return "mad"; + case BRW_OPCODE_NOP: return "nop"; + /* TGSI */ + case TOY_OPCODE_TGSI_IN: return "tgsi.in"; + case TOY_OPCODE_TGSI_CONST: return "tgsi.const"; + case TOY_OPCODE_TGSI_SV: return "tgsi.sv"; + case TOY_OPCODE_TGSI_IMM: return "tgsi.imm"; + case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch"; + case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store"; + case TOY_OPCODE_TGSI_TEX: return "tgsi.tex"; + case TOY_OPCODE_TGSI_TXB: return "tgsi.txb"; + case TOY_OPCODE_TGSI_TXD: return "tgsi.txd"; + case TOY_OPCODE_TGSI_TXL: return "tgsi.txl"; + case TOY_OPCODE_TGSI_TXP: return "tgsi.txp"; + case TOY_OPCODE_TGSI_TXF: return "tgsi.txf"; + case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq"; + case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz"; + case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2"; + case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2"; + case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2"; + case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample"; + case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i"; + case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms"; + case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b"; + case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c"; + case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz"; + case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d"; + case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l"; + case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4"; + case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo"; + case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos"; + case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info"; + /* math */ + case TOY_OPCODE_INV: return "math.inv"; + case TOY_OPCODE_LOG: return "math.log"; + case TOY_OPCODE_EXP: return "math.exp"; + case TOY_OPCODE_SQRT: return "math.sqrt"; + case TOY_OPCODE_RSQ: return "math.rsq"; + case TOY_OPCODE_SIN: return "math.sin"; + case TOY_OPCODE_COS: return "math.cos"; + case TOY_OPCODE_FDIV: return "math.fdiv"; + case TOY_OPCODE_POW: return "math.pow"; + case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient"; + case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer"; + /* urb */ + case TOY_OPCODE_URB_WRITE: return "urb.urb_write"; + /* gs */ + case TOY_OPCODE_EMIT: return "gs.emit"; + case TOY_OPCODE_ENDPRIM: return "gs.endprim"; + /* fs */ + case TOY_OPCODE_DDX: return "fs.ddx"; + case TOY_OPCODE_DDY: return "fs.ddy"; + case TOY_OPCODE_FB_WRITE: return "fs.fb_write"; + case TOY_OPCODE_KIL: return "fs.kil"; + default: return "unk"; + } +} + +static const char * +get_cond_modifier_name(unsigned opcode, unsigned cond_modifier) +{ + switch (opcode) { + case BRW_OPCODE_SEND: + case BRW_OPCODE_SENDC: + /* SFID */ + switch (cond_modifier) { + case BRW_SFID_NULL: return "Null"; + case BRW_SFID_SAMPLER: return "Sampling Engine"; + case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway"; + case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache"; + case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache"; + case BRW_SFID_URB: return "URB"; + case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner"; + case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache"; + default: return "Unknown"; + } + break; + case BRW_OPCODE_MATH: + /* FC */ + switch (cond_modifier) { + case BRW_MATH_FUNCTION_INV: return "INV"; + case BRW_MATH_FUNCTION_LOG: return "LOG"; + case BRW_MATH_FUNCTION_EXP: return "EXP"; + case BRW_MATH_FUNCTION_SQRT: return "SQRT"; + case BRW_MATH_FUNCTION_RSQ: return "RSQ"; + case BRW_MATH_FUNCTION_SIN: return "SIN"; + case BRW_MATH_FUNCTION_COS: return "COS"; + case BRW_MATH_FUNCTION_FDIV: return "FDIV"; + case BRW_MATH_FUNCTION_POW: return "POW"; + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)"; + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)"; + default: return "UNK"; + } + break; + default: + switch (cond_modifier) { + case BRW_CONDITIONAL_NONE: return NULL; + case BRW_CONDITIONAL_Z: return "z"; + case BRW_CONDITIONAL_NZ: return "nz"; + case BRW_CONDITIONAL_G: return "g"; + case BRW_CONDITIONAL_GE: return "ge"; + case BRW_CONDITIONAL_L: return "l"; + case BRW_CONDITIONAL_LE: return "le"; + default: return "unk"; + } + break; + } +} + +/** + * Dump an instruction. + */ +static void +tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst) +{ + const char *name; + int i; + + name = get_opcode_name(inst->opcode); + + ilo_printf(" %s", name); + + if (inst->opcode == BRW_OPCODE_NOP) { + ilo_printf("\n"); + return; + } + + if (inst->saturate) + ilo_printf(".sat"); + + name = get_cond_modifier_name(inst->opcode, inst->cond_modifier); + if (name) + ilo_printf(".%s", name); + + ilo_printf(" "); + + tc_dump_dst(tc, inst->dst); + + for (i = 0; i < Elements(inst->src); i++) { + if (tsrc_is_null(inst->src[i])) + break; + + ilo_printf(", "); + tc_dump_src(tc, inst->src[i]); + } + + ilo_printf("\n"); +} + +/** + * Dump the instructions added to the compiler. + */ +void +toy_compiler_dump(struct toy_compiler *tc) +{ + struct toy_inst *inst; + int pc; + + pc = 0; + tc_head(tc); + while ((inst = tc_next_no_skip(tc)) != NULL) { + /* we do not generate code for markers */ + if (inst->marker) + ilo_printf("marker:"); + else + ilo_printf("%6d:", pc++); + + tc_dump_inst(tc, inst); + } +} + +/** + * Clean up the toy compiler. + */ +void +toy_compiler_cleanup(struct toy_compiler *tc) +{ + struct toy_inst *inst, *next; + + LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list) + util_slab_free(&tc->mempool, inst); + + util_slab_destroy(&tc->mempool); +} + +/** + * Initialize the instruction template, from which tc_add() initializes the + * newly added instructions. + */ +static void +tc_init_inst_templ(struct toy_compiler *tc) +{ + struct toy_inst *templ = &tc->templ; + int i; + + templ->opcode = BRW_OPCODE_NOP; + templ->access_mode = BRW_ALIGN_1; + templ->mask_ctrl = BRW_MASK_ENABLE; + templ->dep_ctrl = BRW_DEPENDENCY_NORMAL; + templ->qtr_ctrl = GEN6_COMPRESSION_1Q; + templ->thread_ctrl = BRW_THREAD_NORMAL; + templ->pred_ctrl = BRW_PREDICATE_NONE; + templ->pred_inv = false; + templ->exec_size = BRW_EXECUTE_1; + templ->cond_modifier = BRW_CONDITIONAL_NONE; + templ->acc_wr_ctrl = false; + templ->saturate = false; + + templ->marker = false; + + templ->dst = tdst_null(); + for (i = 0; i < Elements(templ->src); i++) + templ->src[i] = tsrc_null(); + + for (i = 0; i < Elements(templ->tex.offsets); i++) + templ->tex.offsets[i] = tsrc_null(); + + list_inithead(&templ->list); +} + +/** + * Initialize the toy compiler. + */ +void +toy_compiler_init(struct toy_compiler *tc, int gen) +{ + memset(tc, 0, sizeof(*tc)); + + tc->gen = gen; + + tc_init_inst_templ(tc); + + util_slab_create(&tc->mempool, sizeof(struct toy_inst), + 64, UTIL_SLAB_SINGLETHREADED); + + list_inithead(&tc->instructions); + /* instructions are added to the tail */ + tc_tail(tc); + + tc->rect_linear_width = 1; + + /* skip 0 so that util_hash_table_get() never returns NULL */ + tc->next_vrf = 1; +} diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.h b/src/gallium/drivers/ilo/shader/toy_compiler.h new file mode 100644 index 00000000000..a6413ead066 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_compiler.h @@ -0,0 +1,473 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_COMPILER_H +#define TOY_COMPILER_H + +#include "brw_defines.h" + +#include "util/u_slab.h" +#include "ilo_common.h" +#include "toy_compiler_reg.h" + +/** + * Toy opcodes. + */ +enum toy_opcode { + /* 0..127 are reserved for BRW_OPCODE_x */ + TOY_OPCODE_LAST_HW = 127, + + /* TGSI register functions */ + TOY_OPCODE_TGSI_IN, + TOY_OPCODE_TGSI_CONST, + TOY_OPCODE_TGSI_SV, + TOY_OPCODE_TGSI_IMM, + TOY_OPCODE_TGSI_INDIRECT_FETCH, + TOY_OPCODE_TGSI_INDIRECT_STORE, + + /* TGSI sampling functions */ + TOY_OPCODE_TGSI_TEX, + TOY_OPCODE_TGSI_TXB, + TOY_OPCODE_TGSI_TXD, + TOY_OPCODE_TGSI_TXL, + TOY_OPCODE_TGSI_TXP, + TOY_OPCODE_TGSI_TXF, + TOY_OPCODE_TGSI_TXQ, + TOY_OPCODE_TGSI_TXQ_LZ, + TOY_OPCODE_TGSI_TEX2, + TOY_OPCODE_TGSI_TXB2, + TOY_OPCODE_TGSI_TXL2, + TOY_OPCODE_TGSI_SAMPLE, + TOY_OPCODE_TGSI_SAMPLE_I, + TOY_OPCODE_TGSI_SAMPLE_I_MS, + TOY_OPCODE_TGSI_SAMPLE_B, + TOY_OPCODE_TGSI_SAMPLE_C, + TOY_OPCODE_TGSI_SAMPLE_C_LZ, + TOY_OPCODE_TGSI_SAMPLE_D, + TOY_OPCODE_TGSI_SAMPLE_L, + TOY_OPCODE_TGSI_GATHER4, + TOY_OPCODE_TGSI_SVIEWINFO, + TOY_OPCODE_TGSI_SAMPLE_POS, + TOY_OPCODE_TGSI_SAMPLE_INFO, + + /* math functions */ + TOY_OPCODE_INV, + TOY_OPCODE_LOG, + TOY_OPCODE_EXP, + TOY_OPCODE_SQRT, + TOY_OPCODE_RSQ, + TOY_OPCODE_SIN, + TOY_OPCODE_COS, + TOY_OPCODE_FDIV, + TOY_OPCODE_POW, + TOY_OPCODE_INT_DIV_QUOTIENT, + TOY_OPCODE_INT_DIV_REMAINDER, + + /* URB functions */ + TOY_OPCODE_URB_WRITE, + + /* GS-specific functions */ + TOY_OPCODE_EMIT, + TOY_OPCODE_ENDPRIM, + + /* FS-specific functions */ + TOY_OPCODE_DDX, + TOY_OPCODE_DDY, + TOY_OPCODE_FB_WRITE, + TOY_OPCODE_KIL, +}; + +/** + * Toy instruction. + */ +struct toy_inst { + unsigned opcode:8; /* enum toy_opcode */ + unsigned access_mode:1; /* BRW_ALIGN_x */ + unsigned mask_ctrl:1; /* BRW_MASK_x */ + unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */ + unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */ + unsigned thread_ctrl:2; /* BRW_THREAD_x */ + unsigned pred_ctrl:4; /* BRW_PREDICATE_x */ + unsigned pred_inv:1; /* true or false */ + unsigned exec_size:3; /* BRW_EXECUTE_x */ + unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */ + unsigned acc_wr_ctrl:1; /* true or false */ + unsigned saturate:1; /* true or false */ + + /* true if the instruction should be ignored for instruction iteration */ + unsigned marker:1; + + unsigned pad:1; + + struct toy_dst dst; + struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */ + + struct { + int target; /* TGSI_TEXTURE_x */ + struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */ + } tex; + + struct list_head list; +}; + +/** + * Toy compiler. + */ +struct toy_compiler { + int gen; + + struct toy_inst templ; + struct util_slab_mempool mempool; + struct list_head instructions; + struct list_head *iter, *iter_next; + + /* this is not set until toy_compiler_legalize_for_asm() */ + int num_instructions; + + int rect_linear_width; + int next_vrf; + + bool fail; + const char *reason; +}; + +/** + * Allocate the given number of VRF registers. + */ +static inline int +tc_alloc_vrf(struct toy_compiler *tc, int count) +{ + const int vrf = tc->next_vrf; + + tc->next_vrf += count; + + return vrf; +} + +/** + * Allocate a temporary register. + */ +static inline struct toy_dst +tc_alloc_tmp(struct toy_compiler *tc) +{ + return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0); +} + +/** + * Allocate four temporary registers. + */ +static inline void +tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp) +{ + tmp[0] = tc_alloc_tmp(tc); + tmp[1] = tc_alloc_tmp(tc); + tmp[2] = tc_alloc_tmp(tc); + tmp[3] = tc_alloc_tmp(tc); +} + +/** + * Duplicate an instruction at the current location. + */ +static inline struct toy_inst * +tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst) +{ + struct toy_inst *new_inst; + + new_inst = util_slab_alloc(&tc->mempool); + if (!new_inst) + return NULL; + + *new_inst = *inst; + list_addtail(&new_inst->list, tc->iter_next); + + return new_inst; +} + +/** + * Move an instruction to the current location. + */ +static inline void +tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst) +{ + list_del(&inst->list); + list_addtail(&inst->list, tc->iter_next); +} + +/** + * Discard an instruction. + */ +static inline void +tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst) +{ + list_del(&inst->list); + util_slab_free(&tc->mempool, inst); +} + +/** + * Add a new instruction at the current location, using tc->templ as the + * template. + */ +static inline struct toy_inst * +tc_add(struct toy_compiler *tc) +{ + return tc_duplicate_inst(tc, &tc->templ); +} + +/** + * A convenient version of tc_add() for instructions with 3 source operands. + */ +static inline struct toy_inst * +tc_add3(struct toy_compiler *tc, unsigned opcode, + struct toy_dst dst, + struct toy_src src0, + struct toy_src src1, + struct toy_src src2) +{ + struct toy_inst *inst; + + inst = tc_add(tc); + if (!inst) + return NULL; + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + + return inst; +} + +/** + * A convenient version of tc_add() for instructions with 2 source operands. + */ +static inline struct toy_inst * +tc_add2(struct toy_compiler *tc, int opcode, + struct toy_dst dst, + struct toy_src src0, + struct toy_src src1) +{ + return tc_add3(tc, opcode, dst, src0, src1, tsrc_null()); +} + +/** + * A convenient version of tc_add() for instructions with 1 source operand. + */ +static inline struct toy_inst * +tc_add1(struct toy_compiler *tc, unsigned opcode, + struct toy_dst dst, + struct toy_src src0) +{ + return tc_add2(tc, opcode, dst, src0, tsrc_null()); +} + +/** + * A convenient version of tc_add() for instructions without source or + * destination operands. + */ +static inline struct toy_inst * +tc_add0(struct toy_compiler *tc, unsigned opcode) +{ + return tc_add1(tc, opcode, tdst_null(), tsrc_null()); +} + +#define TC_ALU0(func, opcode) \ +static inline struct toy_inst * \ +func(struct toy_compiler *tc) \ +{ \ + return tc_add0(tc, opcode); \ +} + +#define TC_ALU1(func, opcode) \ +static inline struct toy_inst * \ +func(struct toy_compiler *tc, \ + struct toy_dst dst, \ + struct toy_src src) \ +{ \ + return tc_add1(tc, opcode, dst, src); \ +} + +#define TC_ALU2(func, opcode) \ +static inline struct toy_inst * \ +func(struct toy_compiler *tc, \ + struct toy_dst dst, \ + struct toy_src src0, \ + struct toy_src src1) \ +{ \ + return tc_add2(tc, opcode, \ + dst, src0, src1); \ +} + +#define TC_ALU3(func, opcode) \ +static inline struct toy_inst * \ +func(struct toy_compiler *tc, \ + struct toy_dst dst, \ + struct toy_src src0, \ + struct toy_src src1, \ + struct toy_src src2) \ +{ \ + return tc_add3(tc, opcode, \ + dst, src0, src1, src2); \ +} + +#define TC_CND2(func, opcode) \ +static inline struct toy_inst * \ +func(struct toy_compiler *tc, \ + struct toy_dst dst, \ + struct toy_src src0, \ + struct toy_src src1, \ + unsigned cond_modifier) \ +{ \ + struct toy_inst *inst; \ + inst = tc_add2(tc, opcode, \ + dst, src0, src1); \ + inst->cond_modifier = cond_modifier; \ + return inst; \ +} + +TC_ALU0(tc_NOP, BRW_OPCODE_NOP) +TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE) +TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF) +TC_ALU1(tc_MOV, BRW_OPCODE_MOV) +TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD) +TC_ALU1(tc_INV, TOY_OPCODE_INV) +TC_ALU1(tc_FRC, BRW_OPCODE_FRC) +TC_ALU1(tc_EXP, TOY_OPCODE_EXP) +TC_ALU1(tc_LOG, TOY_OPCODE_LOG) +TC_ALU2(tc_ADD, BRW_OPCODE_ADD) +TC_ALU2(tc_MUL, BRW_OPCODE_MUL) +TC_ALU2(tc_AND, BRW_OPCODE_AND) +TC_ALU2(tc_OR, BRW_OPCODE_OR) +TC_ALU2(tc_DP2, BRW_OPCODE_DP2) +TC_ALU2(tc_DP3, BRW_OPCODE_DP3) +TC_ALU2(tc_DP4, BRW_OPCODE_DP4) +TC_ALU2(tc_SHL, BRW_OPCODE_SHL) +TC_ALU2(tc_SHR, BRW_OPCODE_SHR) +TC_ALU2(tc_POW, TOY_OPCODE_POW) +TC_ALU3(tc_MAC, BRW_OPCODE_MAC) +TC_CND2(tc_SEL, BRW_OPCODE_SEL) +TC_CND2(tc_CMP, BRW_OPCODE_CMP) +TC_CND2(tc_IF, BRW_OPCODE_IF) +TC_CND2(tc_SEND, BRW_OPCODE_SEND) + +/** + * Upcast a list_head to an instruction. + */ +static inline struct toy_inst * +tc_list_to_inst(struct toy_compiler *tc, struct list_head *item) +{ + return container_of(item, (struct toy_inst *) NULL, list); +} + +/** + * Return the instruction at the current location. + */ +static inline struct toy_inst * +tc_current(struct toy_compiler *tc) +{ + return (tc->iter != &tc->instructions) ? + tc_list_to_inst(tc, tc->iter) : NULL; +} + +/** + * Set the current location to the head. + */ +static inline void +tc_head(struct toy_compiler *tc) +{ + tc->iter = &tc->instructions; + tc->iter_next = tc->iter->next; +} + +/** + * Set the current location to the tail. + */ +static inline void +tc_tail(struct toy_compiler *tc) +{ + tc->iter = &tc->instructions; + tc->iter_next = tc->iter; +} + +/** + * Advance the current location. + */ +static inline struct toy_inst * +tc_next_no_skip(struct toy_compiler *tc) +{ + /* stay at the tail so that new instructions are added there */ + if (tc->iter_next == &tc->instructions) { + tc_tail(tc); + return NULL; + } + + tc->iter = tc->iter_next; + tc->iter_next = tc->iter_next->next; + + return tc_list_to_inst(tc, tc->iter); +} + +/** + * Advance the current location, skipping markers. + */ +static inline struct toy_inst * +tc_next(struct toy_compiler *tc) +{ + struct toy_inst *inst; + + do { + inst = tc_next_no_skip(tc); + } while (inst && inst->marker); + + return inst; +} + +static inline void +tc_fail(struct toy_compiler *tc, const char *reason) +{ + if (!tc->fail) { + tc->fail = true; + tc->reason = reason; + } +} + +void +toy_compiler_init(struct toy_compiler *tc, int gen); + +void +toy_compiler_cleanup(struct toy_compiler *tc); + +void +toy_compiler_dump(struct toy_compiler *tc); + +void * +toy_compiler_assemble(struct toy_compiler *tc, int *size); + +void +toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size); + +#endif /* TOY_COMPILER_H */ diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_asm.c b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c new file mode 100644 index 00000000000..09a00dd2211 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c @@ -0,0 +1,750 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "toy_compiler.h" + +#define CG_REG_SHIFT 5 +#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) + +struct codegen { + const struct toy_inst *inst; + int pc; + + unsigned flag_sub_reg_num; + + struct codegen_dst { + unsigned file; + unsigned type; + bool indirect; + unsigned indirect_subreg; + unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ + + unsigned horz_stride; + + unsigned writemask; + } dst; + + struct codegen_src { + unsigned file; + unsigned type; + bool indirect; + unsigned indirect_subreg; + unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ + + unsigned vert_stride; + unsigned width; + unsigned horz_stride; + + unsigned swizzle[4]; + bool absolute; + bool negate; + } src[3]; +}; + +/** + * Return true if the source operand is null. + */ +static bool +src_is_null(const struct codegen *cg, int idx) +{ + const struct codegen_src *src = &cg->src[idx]; + + return (src->file == BRW_ARCHITECTURE_REGISTER_FILE && + src->origin == BRW_ARF_NULL << CG_REG_SHIFT); +} + +/** + * Translate a source operand to DW2 or DW3 of the 1-src/2-src format. + */ +static uint32_t +translate_src(const struct codegen *cg, int idx) +{ + const struct codegen_src *src = &cg->src[idx]; + uint32_t dw; + + /* special treatment may be needed if any of the operand is immediate */ + if (cg->src[0].file == BRW_IMMEDIATE_VALUE) { + assert(!cg->src[0].absolute && !cg->src[0].negate); + /* only the last src operand can be an immediate */ + assert(src_is_null(cg, 1)); + + if (idx == 0) + return cg->flag_sub_reg_num << 25; + else + return cg->src[0].origin; + } + else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) { + assert(!cg->src[1].absolute && !cg->src[1].negate); + return cg->src[1].origin; + } + + assert(src->file != BRW_IMMEDIATE_VALUE); + + if (src->indirect) { + const int offset = (int) src->origin; + + assert(src->file == BRW_GENERAL_REGISTER_FILE); + assert(offset < 512 && offset >= -512); + + if (cg->inst->access_mode == BRW_ALIGN_16) { + assert(src->width == BRW_WIDTH_4); + assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); + + /* the lower 4 bits are reserved for the swizzle_[xy] */ + assert(!(src->origin & 0xf)); + + dw = src->vert_stride << 21 | + src->swizzle[3] << 18 | + src->swizzle[2] << 16 | + BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | + src->negate << 14 | + src->absolute << 13 | + src->indirect_subreg << 10 | + (src->origin & 0x3f0) | + src->swizzle[1] << 2 | + src->swizzle[0]; + } + else { + assert(src->swizzle[0] == TOY_SWIZZLE_X && + src->swizzle[1] == TOY_SWIZZLE_Y && + src->swizzle[2] == TOY_SWIZZLE_Z && + src->swizzle[3] == TOY_SWIZZLE_W); + + dw = src->vert_stride << 21 | + src->width << 18 | + src->horz_stride << 16 | + BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | + src->negate << 14 | + src->absolute << 13 | + src->indirect_subreg << 10 | + (src->origin & 0x3ff); + } + } + else { + switch (src->file) { + case BRW_ARCHITECTURE_REGISTER_FILE: + break; + case BRW_GENERAL_REGISTER_FILE: + assert(CG_REG_NUM(src->origin) < 128); + break; + case BRW_MESSAGE_REGISTER_FILE: + assert(cg->inst->opcode == BRW_OPCODE_SEND || + cg->inst->opcode == BRW_OPCODE_SENDC); + assert(CG_REG_NUM(src->origin) < 16); + break; + case BRW_IMMEDIATE_VALUE: + default: + assert(!"invalid src file"); + break; + } + + if (cg->inst->access_mode == BRW_ALIGN_16) { + assert(src->width == BRW_WIDTH_4); + assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); + + /* the lower 4 bits are reserved for the swizzle_[xy] */ + assert(!(src->origin & 0xf)); + + dw = src->vert_stride << 21 | + src->swizzle[3] << 18 | + src->swizzle[2] << 16 | + BRW_ADDRESS_DIRECT << 15 | + src->negate << 14 | + src->absolute << 13 | + src->origin | + src->swizzle[1] << 2 | + src->swizzle[0]; + } + else { + assert(src->swizzle[0] == TOY_SWIZZLE_X && + src->swizzle[1] == TOY_SWIZZLE_Y && + src->swizzle[2] == TOY_SWIZZLE_Z && + src->swizzle[3] == TOY_SWIZZLE_W); + + dw = src->vert_stride << 21 | + src->width << 18 | + src->horz_stride << 16 | + BRW_ADDRESS_DIRECT << 15 | + src->negate << 14 | + src->absolute << 13 | + src->origin; + } + } + + if (idx == 0) + dw |= cg->flag_sub_reg_num << 25; + + return dw; +} + +/** + * Translate the destination operand to the higher 16 bits of DW1 of the + * 1-src/2-src format. + */ +static uint16_t +translate_dst_region(const struct codegen *cg) +{ + const struct codegen_dst *dst = &cg->dst; + uint16_t dw1_region; + + if (dst->file == BRW_IMMEDIATE_VALUE) { + /* dst is immediate (JIP) when the opcode is a conditional branch */ + switch (cg->inst->opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_WHILE: + assert(dst->type == BRW_REGISTER_TYPE_W); + dw1_region = (dst->origin & 0xffff); + break; + default: + assert(!"dst cannot be immediate"); + dw1_region = 0; + break; + } + + return dw1_region; + } + + if (dst->indirect) { + const int offset = (int) dst->origin; + + assert(dst->file == BRW_GENERAL_REGISTER_FILE); + assert(offset < 512 && offset >= -512); + + if (cg->inst->access_mode == BRW_ALIGN_16) { + /* + * From the Sandy Bridge PRM, volume 4 part 2, page 144: + * + * "Allthough Dst.HorzStride is a don't care for Align16, HW + * needs this to be programmed as 01." + */ + assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); + /* the lower 4 bits are reserved for the writemask */ + assert(!(dst->origin & 0xf)); + + dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | + dst->horz_stride << 13 | + dst->indirect_subreg << 10 | + (dst->origin & 0x3f0) | + dst->writemask; + } + else { + assert(dst->writemask == TOY_WRITEMASK_XYZW); + + dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | + dst->horz_stride << 13 | + dst->indirect_subreg << 10 | + (dst->origin & 0x3ff); + } + } + else { + assert((dst->file == BRW_GENERAL_REGISTER_FILE && + CG_REG_NUM(dst->origin) < 128) || + (dst->file == BRW_MESSAGE_REGISTER_FILE && + CG_REG_NUM(dst->origin) < 16) || + (dst->file == BRW_ARCHITECTURE_REGISTER_FILE)); + + if (cg->inst->access_mode == BRW_ALIGN_16) { + /* similar to the indirect case */ + assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); + assert(!(dst->origin & 0xf)); + + dw1_region = BRW_ADDRESS_DIRECT << 15 | + dst->horz_stride << 13 | + dst->origin | + dst->writemask; + } + else { + assert(dst->writemask == TOY_WRITEMASK_XYZW); + + dw1_region = BRW_ADDRESS_DIRECT << 15 | + dst->horz_stride << 13 | + dst->origin; + } + } + + return dw1_region; +} + +/** + * Translate the destination operand to DW1 of the 1-src/2-src format. + */ +static uint32_t +translate_dst(const struct codegen *cg) +{ + return translate_dst_region(cg) << 16 | + cg->src[1].type << 12 | + cg->src[1].file << 10 | + cg->src[0].type << 7 | + cg->src[0].file << 5 | + cg->dst.type << 2 | + cg->dst.file; +} + +/** + * Translate the instruction to DW0 of the 1-src/2-src format. + */ +static uint32_t +translate_inst(const struct codegen *cg) +{ + const bool debug_ctrl = false; + const bool cmpt_ctrl = false; + + assert(cg->inst->opcode < 128); + + return cg->inst->saturate << 31 | + debug_ctrl << 30 | + cmpt_ctrl << 29 | + cg->inst->acc_wr_ctrl << 28 | + cg->inst->cond_modifier << 24 | + cg->inst->exec_size << 21 | + cg->inst->pred_inv << 20 | + cg->inst->pred_ctrl << 16 | + cg->inst->thread_ctrl << 14 | + cg->inst->qtr_ctrl << 12 | + cg->inst->dep_ctrl << 10 | + cg->inst->mask_ctrl << 9 | + cg->inst->access_mode << 8 | + cg->inst->opcode; +} + +/** + * Codegen an instruction in 1-src/2-src format. + */ +static void +codegen_inst(const struct codegen *cg, uint32_t *code) +{ + code[0] = translate_inst(cg); + code[1] = translate_dst(cg); + code[2] = translate_src(cg, 0); + code[3] = translate_src(cg, 1); + assert(src_is_null(cg, 2)); +} + +/** + * Codegen an instruction in 3-src format. + */ +static void +codegen_inst_3src(const struct codegen *cg, uint32_t *code) +{ + const struct codegen_dst *dst = &cg->dst; + uint32_t dw0, dw1, dw_src[3]; + int i; + + dw0 = translate_inst(cg); + + /* + * 3-src instruction restrictions + * + * - align16 with direct addressing + * - GRF or MRF dst + * - GRF src + * - sub_reg_num is DWORD aligned + * - no regioning except replication control + * (vert_stride == 0 && horz_stride == 0) + */ + assert(cg->inst->access_mode == BRW_ALIGN_16); + + assert(!dst->indirect); + assert((dst->file == BRW_GENERAL_REGISTER_FILE && + CG_REG_NUM(dst->origin) < 128) || + (dst->file == BRW_MESSAGE_REGISTER_FILE && + CG_REG_NUM(dst->origin) < 16)); + assert(!(dst->origin & 0x3)); + assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); + + dw1 = dst->origin << 19 | + dst->writemask << 17 | + cg->src[2].negate << 9 | + cg->src[2].absolute << 8 | + cg->src[1].negate << 7 | + cg->src[1].absolute << 6 | + cg->src[0].negate << 5 | + cg->src[0].absolute << 4 | + cg->flag_sub_reg_num << 1 | + (dst->file == BRW_MESSAGE_REGISTER_FILE); + + for (i = 0; i < 3; i++) { + const struct codegen_src *src = &cg->src[i]; + + assert(!src->indirect); + assert(src->file == BRW_GENERAL_REGISTER_FILE && + CG_REG_NUM(src->origin) < 128); + assert(!(src->origin & 0x3)); + + assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 && + src->horz_stride == BRW_HORIZONTAL_STRIDE_1) || + (src->vert_stride == BRW_VERTICAL_STRIDE_0 && + src->horz_stride == BRW_HORIZONTAL_STRIDE_0)); + assert(src->width == BRW_WIDTH_4); + + dw_src[i] = src->origin << 7 | + src->swizzle[3] << 7 | + src->swizzle[2] << 5 | + src->swizzle[1] << 3 | + src->swizzle[0] << 1 | + (src->vert_stride == BRW_VERTICAL_STRIDE_0 && + src->horz_stride == BRW_HORIZONTAL_STRIDE_0); + + /* only the lower 20 bits are used */ + assert((dw_src[i] & 0xfffff) == dw_src[i]); + } + + code[0] = dw0; + code[1] = dw1; + /* concatenate the bits of dw_src */ + code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; + code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); +} + +/** + * Sanity check the region parameters of the operands. + */ +static void +codegen_validate_region_restrictions(const struct codegen *cg) +{ + const int exec_size_map[] = { + [BRW_EXECUTE_1] = 1, + [BRW_EXECUTE_2] = 2, + [BRW_EXECUTE_4] = 4, + [BRW_EXECUTE_8] = 8, + [BRW_EXECUTE_16] = 16, + [BRW_EXECUTE_32] = 32, + }; + const int width_map[] = { + [BRW_WIDTH_1] = 1, + [BRW_WIDTH_2] = 2, + [BRW_WIDTH_4] = 4, + [BRW_WIDTH_8] = 8, + [BRW_WIDTH_16] = 16, + }; + const int horz_stride_map[] = { + [BRW_HORIZONTAL_STRIDE_0] = 0, + [BRW_HORIZONTAL_STRIDE_1] = 1, + [BRW_HORIZONTAL_STRIDE_2] = 2, + [BRW_HORIZONTAL_STRIDE_4] = 4, + }; + const int vert_stride_map[] = { + [BRW_VERTICAL_STRIDE_0] = 0, + [BRW_VERTICAL_STRIDE_1] = 1, + [BRW_VERTICAL_STRIDE_2] = 2, + [BRW_VERTICAL_STRIDE_4] = 4, + [BRW_VERTICAL_STRIDE_8] = 8, + [BRW_VERTICAL_STRIDE_16] = 16, + [BRW_VERTICAL_STRIDE_32] = 32, + [BRW_VERTICAL_STRIDE_64] = 64, + [BRW_VERTICAL_STRIDE_128] = 128, + [BRW_VERTICAL_STRIDE_256] = 256, + [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0, + }; + const int exec_size = exec_size_map[cg->inst->exec_size]; + int i; + + /* Sandy Bridge PRM, volume 4 part 2, page 94 */ + + /* 1. (we don't do 32 anyway) */ + assert(exec_size <= 16); + + for (i = 0; i < Elements(cg->src); i++) { + const int width = width_map[cg->src[i].width]; + const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; + const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; + + if (src_is_null(cg, i)) + break; + + /* 3. */ + assert(exec_size >= width); + + if (exec_size == width) { + /* 4. & 5. */ + if (horz_stride) + assert(vert_stride == width * horz_stride); + } + + if (width == 1) { + /* 6. */ + assert(horz_stride == 0); + + /* 7. */ + if (exec_size == 1) + assert(vert_stride == 0); + } + + /* 8. */ + if (!vert_stride && !horz_stride) + assert(width == 1); + } + + /* derived from 10.1.2. & 10.2. */ + assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0); +} + +static unsigned +translate_vfile(enum toy_file file) +{ + switch (file) { + case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE; + case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE; + case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE; + case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE; + default: + assert(!"unhandled toy file"); + return BRW_GENERAL_REGISTER_FILE; + } +} + +static unsigned +translate_vtype(enum toy_type type) +{ + switch (type) { + case TOY_TYPE_F: return BRW_REGISTER_TYPE_F; + case TOY_TYPE_D: return BRW_REGISTER_TYPE_D; + case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD; + case TOY_TYPE_W: return BRW_REGISTER_TYPE_W; + case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW; + case TOY_TYPE_V: return BRW_REGISTER_TYPE_V; + default: + assert(!"unhandled toy type"); + return BRW_REGISTER_TYPE_F; + } +} + +static unsigned +translate_writemask(enum toy_writemask writemask) +{ + /* TOY_WRITEMASK_* are compatible with the hardware definitions */ + assert(writemask <= 0xf); + return writemask; +} + +static unsigned +translate_swizzle(enum toy_swizzle swizzle) +{ + /* TOY_SWIZZLE_* are compatible with the hardware definitions */ + assert(swizzle <= 3); + return swizzle; +} + +/** + * Prepare for generating an instruction. + */ +static void +codegen_prepare(struct codegen *cg, const struct toy_inst *inst, + int pc, int rect_linear_width) +{ + int i; + + cg->inst = inst; + cg->pc = pc; + + cg->flag_sub_reg_num = 0; + + cg->dst.file = translate_vfile(inst->dst.file); + cg->dst.type = translate_vtype(inst->dst.type); + cg->dst.indirect = inst->dst.indirect; + cg->dst.indirect_subreg = inst->dst.indirect_subreg; + cg->dst.origin = inst->dst.val32; + + /* + * From the Sandy Bridge PRM, volume 4 part 2, page 81: + * + * "For a word or an unsigned word immediate data, software must + * replicate the same 16-bit immediate value to both the lower word + * and the high word of the 32-bit immediate field in an instruction." + */ + if (inst->dst.file == TOY_FILE_IMM) { + switch (inst->dst.type) { + case TOY_TYPE_W: + case TOY_TYPE_UW: + cg->dst.origin &= 0xffff; + cg->dst.origin |= cg->dst.origin << 16; + break; + default: + break; + } + } + + cg->dst.writemask = translate_writemask(inst->dst.writemask); + + switch (inst->dst.rect) { + case TOY_RECT_LINEAR: + cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; + break; + default: + assert(!"unsupported dst region"); + cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; + break; + } + + for (i = 0; i < Elements(cg->src); i++) { + struct codegen_src *src = &cg->src[i]; + + src->file = translate_vfile(inst->src[i].file); + src->type = translate_vtype(inst->src[i].type); + src->indirect = inst->src[i].indirect; + src->indirect_subreg = inst->src[i].indirect_subreg; + src->origin = inst->src[i].val32; + + /* do the same for src */ + if (inst->dst.file == TOY_FILE_IMM) { + switch (inst->src[i].type) { + case TOY_TYPE_W: + case TOY_TYPE_UW: + src->origin &= 0xffff; + src->origin |= src->origin << 16; + break; + default: + break; + } + } + + src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); + src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); + src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); + src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); + src->absolute = inst->src[i].absolute; + src->negate = inst->src[i].negate; + + switch (inst->src[i].rect) { + case TOY_RECT_LINEAR: + switch (rect_linear_width) { + case 1: + src->vert_stride = BRW_VERTICAL_STRIDE_1; + src->width = BRW_WIDTH_1; + break; + case 2: + src->vert_stride = BRW_VERTICAL_STRIDE_2; + src->width = BRW_WIDTH_2; + break; + case 4: + src->vert_stride = BRW_VERTICAL_STRIDE_4; + src->width = BRW_WIDTH_4; + break; + case 8: + src->vert_stride = BRW_VERTICAL_STRIDE_8; + src->width = BRW_WIDTH_8; + break; + case 16: + src->vert_stride = BRW_VERTICAL_STRIDE_16; + src->width = BRW_WIDTH_16; + break; + default: + assert(!"unsupported TOY_RECT_LINEAR width"); + src->vert_stride = BRW_VERTICAL_STRIDE_1; + src->width = BRW_WIDTH_1; + break; + } + src->horz_stride = BRW_HORIZONTAL_STRIDE_1; + break; + case TOY_RECT_041: + src->vert_stride = BRW_VERTICAL_STRIDE_0; + src->width = BRW_WIDTH_4; + src->horz_stride = BRW_HORIZONTAL_STRIDE_1; + break; + case TOY_RECT_010: + src->vert_stride = BRW_VERTICAL_STRIDE_0; + src->width = BRW_WIDTH_1; + src->horz_stride = BRW_HORIZONTAL_STRIDE_0; + break; + case TOY_RECT_220: + src->vert_stride = BRW_VERTICAL_STRIDE_2; + src->width = BRW_WIDTH_2; + src->horz_stride = BRW_HORIZONTAL_STRIDE_0; + break; + case TOY_RECT_440: + src->vert_stride = BRW_VERTICAL_STRIDE_4; + src->width = BRW_WIDTH_4; + src->horz_stride = BRW_HORIZONTAL_STRIDE_0; + break; + case TOY_RECT_240: + src->vert_stride = BRW_VERTICAL_STRIDE_2; + src->width = BRW_WIDTH_4; + src->horz_stride = BRW_HORIZONTAL_STRIDE_0; + break; + default: + assert(!"unsupported src region"); + src->vert_stride = BRW_VERTICAL_STRIDE_1; + src->width = BRW_WIDTH_1; + src->horz_stride = BRW_HORIZONTAL_STRIDE_1; + break; + } + } +} + +/** + * Generate HW shader code. The instructions should have been legalized. + */ +void * +toy_compiler_assemble(struct toy_compiler *tc, int *size) +{ + const struct toy_inst *inst; + uint32_t *code; + int pc; + + code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); + if (!code) + return NULL; + + pc = 0; + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + uint32_t *dw = &code[pc * 4]; + struct codegen cg; + + if (pc >= tc->num_instructions) { + tc_fail(tc, "wrong instructoun count"); + break; + } + + codegen_prepare(&cg, inst, pc, tc->rect_linear_width); + codegen_validate_region_restrictions(&cg); + + switch (inst->opcode) { + case BRW_OPCODE_MAD: + codegen_inst_3src(&cg, dw); + break; + default: + codegen_inst(&cg, dw); + break; + } + + pc++; + } + + /* never return an invalid kernel */ + if (tc->fail) { + FREE(code); + return NULL; + } + + if (size) + *size = pc * 4 * sizeof(uint32_t); + + return code; +} diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c new file mode 100644 index 00000000000..bedbc3d53c8 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c @@ -0,0 +1,1385 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +typedef short GLshort; +typedef int GLint; +typedef unsigned char GLubyte; +typedef unsigned int GLuint; +typedef float GLfloat; +#include <stdint.h> +#include "brw_defines.h" +#include "brw_structs.h" +static int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); + +#include "toy_compiler.h" + +void +toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size) +{ + /* set this to true to dump the hex */ + const bool dump_hex = false; + const struct brw_instruction *instructions = kernel; + int i; + + for (i = 0; i < size / sizeof(*instructions); i++) { + if (dump_hex) { + const uint32_t *dwords = (const uint32_t *) &instructions[i]; + ilo_printf("0x%08x 0x%08x 0x%08x 0x%08x ", + dwords[3], dwords[2], dwords[1], dwords[0]); + } + + brw_disasm(stderr, (struct brw_instruction *) &instructions[i], + ILO_GEN_GET_MAJOR(tc->gen)); + } +} + +static const struct opcode_desc { + char *name; + int nsrc; + int ndst; +} opcode_descs[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, + [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; +static const struct opcode_desc *opcode = opcode_descs; + +static const char * const conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +static const char * const negate[2] = { + [0] = "", + [1] = "-", +}; + +static const char * const _abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +static const char * const vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +static const char * const width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +static const char * const horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +static const char * const chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +static const char * const debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +static const char * const saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +static const char * const accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +static const char * const wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + +static const char * const exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +static const char * const pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +static const char * const pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +static const char * const pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +static const char * const thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +static const char * const compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", + [3] = "compr4", +}; + +static const char * const dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +static const char * const mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +static const char * const access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +static const char * const reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static const char * const reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +static const char * const writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +static const char * const end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +static const char * const target_function[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_DATAPORT_READ] = "read", + [BRW_SFID_DATAPORT_WRITE] = "write", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner" +}; + +static const char * const target_function_gen6[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", + [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", + [GEN7_SFID_DATAPORT_DATA_CACHE] = "data" +}; + +static const char * const dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + +static const char * const math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_FDIV] = "fdiv", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", +}; + +static const char * const math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +static const char * const math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +static const char * const math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +static const char * const math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +static const char * const urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + +static const char * const urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +static const char * const urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +static const char * const urb_used[2] = { + [0] = "", + [1] = "used" +}; + +static const char * const urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +static const char * const sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, const char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, const char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, const char *name, const char * const ctrl[], + GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); + string (file, "<"); + err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL); + string (file, ">"); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]<"); + err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL); + string (file, ">"); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int dest_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + uint32_t reg_file; + + if (inst->bits1.da3src.dest_reg_file) + reg_file = BRW_MESSAGE_REGISTER_FILE; + else + reg_file = BRW_GENERAL_REGISTER_FILE; + + err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da3src.dest_subreg_nr) + format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL); + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",4,1>"); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + return err; +} + +static int src0_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); + if (err == -1) + return 0; + if (inst->bits2.da3src.src0_subreg_nr) + format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int src1_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; + GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | + (inst->bits3.da3src.src1_subreg_nr_high << 2)); + + err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src1_reg_nr); + if (err == -1) + return 0; + if (src1_subreg_nr) + format (file, ".%d", src1_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int src2_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; + GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; + GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; + GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src2_reg_nr); + if (err == -1) + return 0; + if (inst->bits3.da3src.src2_subreg_nr) + format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int esize[6] = { + [0] = 1, + [1] = 2, + [2] = 4, + [3] = 8, + [4] = 16, + [5] = 32, +}; + +static int qtr_ctrl(FILE *file, struct brw_instruction *inst) +{ + int qtr_ctl = inst->header.compression_control; + int exec_size = esize[inst->header.execution_size]; + + if (exec_size == 8) { + switch (qtr_ctl) { + case 0: + string (file, " 1Q"); + break; + case 1: + string (file, " 2Q"); + break; + case 2: + string (file, " 3Q"); + break; + case 3: + string (file, " 4Q"); + break; + } + } else if (exec_size == 16){ + if (qtr_ctl < 2) + string (file, " 1H"); + else + string (file, " 2H"); + } + return 0; +} + +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); + if (inst->bits2.da1.flag_subreg_nr) + format (file, ".%d", inst->bits2.da1.flag_subreg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode == BRW_OPCODE_MATH) { + string (file, " "); + err |= control (file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC) { + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + /* If we're using the conditional modifier, print which flags reg is + * used for it. Note that on gen6+, the embedded-condition SEL and + * control flow doesn't update flags. + */ + if (inst->header.destreg__conditionalmod && + (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL && + inst->header.opcode != BRW_OPCODE_IF && + inst->header.opcode != BRW_OPCODE_WHILE))) { + format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); + if (inst->bits2.da1.flag_subreg_nr) + format (file, ".%d", inst->bits2.da1.flag_subreg_nr); + } + } + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].nsrc == 3) { + pad (file, 16); + err |= dest_3src (file, inst); + + pad (file, 32); + err |= src0_3src (file, inst); + + pad (file, 48); + err |= src1_3src (file, inst); + + pad (file, 64); + err |= src2_3src (file, inst); + } else { + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits3.break_cont.jip); + } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits1.branch_gen6.jump_count); + } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK || + inst->header.opcode == BRW_OPCODE_CONTINUE || + inst->header.opcode == BRW_OPCODE_HALT)) || + (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) { + format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip); + } else if (inst->header.opcode == BRW_OPCODE_JMPI) { + format (file, " %d", inst->bits3.d); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + } + + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { + enum brw_message_target target; + + if (gen >= 6) + target = inst->header.destreg__conditionalmod; + else if (gen == 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + + newline (file); + pad (file, 16); + space = 0; + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } + + switch (target) { + case BRW_SFID_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_SFID_SAMPLER: + if (gen >= 7) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen7.binding_table_index, + inst->bits3.sampler_gen7.sampler, + inst->bits3.sampler_gen7.msg_type, + inst->bits3.sampler_gen7.simd_mode); + } else if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_SFID_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } + break; + + case BRW_SFID_DATAPORT_WRITE: + if (gen >= 7) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen7_dp.msg_type, &space); + + format (file, ", %d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + } else if (gen == 6) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.last_render_target << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } + break; + + case BRW_SFID_URB: + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_SFID_THREAD_SPAWNER: + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + format (file, " (%d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + break; + + + default: + format (file, "unsupported target %d", target); + break; + } + if (space) + string (file, " "); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + if (gen >= 6) + err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space); + else + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + + if (gen >= 6) + err |= qtr_ctrl (file, inst); + else { + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format (file, " compr4"); + } else { + err |= control (file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } + } + + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (gen >= 6) + err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_reg.h b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h new file mode 100644 index 00000000000..8c11b3a3275 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h @@ -0,0 +1,800 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_REG_H +#define TOY_REG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" /* for assert() */ +#include "util/u_math.h" /* for union fi */ + +/* a toy reg is 256-bit wide */ +#define TOY_REG_WIDTH 32 + +/** + * Register files. + */ +enum toy_file { + /* virtual register file */ + TOY_FILE_VRF, + + TOY_FILE_ARF, + TOY_FILE_GRF, + TOY_FILE_MRF, + TOY_FILE_IMM, + + TOY_FILE_COUNT, +}; + +/** + * Register types. + */ +enum toy_type { + TOY_TYPE_F, + TOY_TYPE_D, + TOY_TYPE_UD, + TOY_TYPE_W, + TOY_TYPE_UW, + TOY_TYPE_V, /* only valid for immediates */ + + TOY_TYPE_COUNT, +}; + +/** + * Register rectangles. The three numbers stand for vertical stride, width, + * and horizontal stride respectively. + */ +enum toy_rect { + TOY_RECT_LINEAR, + TOY_RECT_041, + TOY_RECT_010, + TOY_RECT_220, + TOY_RECT_440, + TOY_RECT_240, + + TOY_RECT_COUNT, +}; + +/** + * Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware + * values. + */ +enum toy_swizzle { + TOY_SWIZZLE_X = 0, + TOY_SWIZZLE_Y = 1, + TOY_SWIZZLE_Z = 2, + TOY_SWIZZLE_W = 3, +}; + +/** + * Destination writemasks. They are compatible with TGSI_WRITEMASK_x and + * hardware values. + */ +enum toy_writemask { + TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X), + TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y), + TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z), + TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W), + TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y), + TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z), + TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W), + TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z), + TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W), + TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W), + TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z), + TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W), + TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W), + TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W), + TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | + TOY_WRITEMASK_Z | TOY_WRITEMASK_W), +}; + +/** + * Destination operand. + */ +struct toy_dst { + unsigned file:3; /* TOY_FILE_x */ + unsigned type:3; /* TOY_TYPE_x */ + unsigned rect:3; /* TOY_RECT_x */ + unsigned indirect:1; /* true or false */ + unsigned indirect_subreg:6; /* which subreg of a0? */ + + unsigned writemask:4; /* TOY_WRITEMASK_x */ + unsigned pad:12; + + uint32_t val32; +}; + +/** + * Source operand. + */ +struct toy_src { + unsigned file:3; /* TOY_FILE_x */ + unsigned type:3; /* TOY_TYPE_x */ + unsigned rect:3; /* TOY_RECT_x */ + unsigned indirect:1; /* true or false */ + unsigned indirect_subreg:6; /* which subreg of a0? */ + + unsigned swizzle_x:2; /* TOY_SWIZZLE_x */ + unsigned swizzle_y:2; /* TOY_SWIZZLE_x */ + unsigned swizzle_z:2; /* TOY_SWIZZLE_x */ + unsigned swizzle_w:2; /* TOY_SWIZZLE_x */ + unsigned absolute:1; /* true or false */ + unsigned negate:1; /* true or false */ + unsigned pad:6; + + uint32_t val32; +}; + +/** + * Return true if the file is virtual. + */ +static inline bool +toy_file_is_virtual(enum toy_file file) +{ + return (file == TOY_FILE_VRF); +} + +/** + * Return true if the file is a hardware one. + */ +static inline bool +toy_file_is_hw(enum toy_file file) +{ + return !toy_file_is_virtual(file); +} + +/** + * Return the size of the file. + */ +static inline uint32_t +toy_file_size(enum toy_file file) +{ + switch (file) { + case TOY_FILE_GRF: + return 256 * TOY_REG_WIDTH; + case TOY_FILE_MRF: + /* there is no MRF on GEN7+ */ + return 256 * TOY_REG_WIDTH; + default: + assert(!"invalid toy file"); + return 0; + } +} + +/** + * Return the size of the type. + */ +static inline int +toy_type_size(enum toy_type type) +{ + switch (type) { + case TOY_TYPE_F: + case TOY_TYPE_D: + case TOY_TYPE_UD: + return 4; + case TOY_TYPE_W: + case TOY_TYPE_UW: + return 2; + case TOY_TYPE_V: + default: + assert(!"invalid toy type"); + return 0; + } +} + +/** + * Return true if the destination operand is null. + */ +static inline bool +tdst_is_null(struct toy_dst dst) +{ + /* BRW_ARF_NULL happens to be 0 */ + return (dst.file == TOY_FILE_ARF && dst.val32 == 0); +} + +/** + * Validate the destination operand. + */ +static inline struct toy_dst +tdst_validate(struct toy_dst dst) +{ + switch (dst.file) { + case TOY_FILE_VRF: + case TOY_FILE_ARF: + case TOY_FILE_MRF: + assert(!dst.indirect); + if (dst.file == TOY_FILE_MRF) + assert(dst.val32 < toy_file_size(dst.file)); + break; + case TOY_FILE_GRF: + if (!dst.indirect) + assert(dst.val32 < toy_file_size(dst.file)); + break; + case TOY_FILE_IMM: + /* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */ + assert(!dst.indirect); + assert(dst.type == TOY_TYPE_W); + break; + default: + assert(!"invalid dst file"); + break; + } + + switch (dst.type) { + case TOY_TYPE_V: + assert(!"invalid dst type"); + break; + default: + break; + } + + assert(dst.rect == TOY_RECT_LINEAR); + if (dst.file != TOY_FILE_IMM) + assert(dst.val32 % toy_type_size(dst.type) == 0); + + assert(dst.writemask <= TOY_WRITEMASK_XYZW); + + return dst; +} + +/** + * Change the type of the destination operand. + */ +static inline struct toy_dst +tdst_type(struct toy_dst dst, enum toy_type type) +{ + dst.type = type; + return tdst_validate(dst); +} + +/** + * Change the type of the destination operand to TOY_TYPE_D. + */ +static inline struct toy_dst +tdst_d(struct toy_dst dst) +{ + return tdst_type(dst, TOY_TYPE_D); +} + +/** + * Change the type of the destination operand to TOY_TYPE_UD. + */ +static inline struct toy_dst +tdst_ud(struct toy_dst dst) +{ + return tdst_type(dst, TOY_TYPE_UD); +} + +/** + * Change the type of the destination operand to TOY_TYPE_W. + */ +static inline struct toy_dst +tdst_w(struct toy_dst dst) +{ + return tdst_type(dst, TOY_TYPE_W); +} + +/** + * Change the type of the destination operand to TOY_TYPE_UW. + */ +static inline struct toy_dst +tdst_uw(struct toy_dst dst) +{ + return tdst_type(dst, TOY_TYPE_UW); +} + +/** + * Change the rectangle of the destination operand. + */ +static inline struct toy_dst +tdst_rect(struct toy_dst dst, enum toy_rect rect) +{ + dst.rect = rect; + return tdst_validate(dst); +} + +/** + * Apply writemask to the destination operand. Note that the current + * writemask is honored. + */ +static inline struct toy_dst +tdst_writemask(struct toy_dst dst, enum toy_writemask writemask) +{ + dst.writemask &= writemask; + return tdst_validate(dst); +} + +/** + * Offset the destination operand. + */ +static inline struct toy_dst +tdst_offset(struct toy_dst dst, int reg, int subreg) +{ + dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type); + return tdst_validate(dst); +} + +/** + * Construct a destination operand. + */ +static inline struct toy_dst +tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect, + bool indirect, unsigned indirect_subreg, + enum toy_writemask writemask, uint32_t val32) +{ + struct toy_dst dst; + + dst.file = file; + dst.type = type; + dst.rect = rect; + dst.indirect = indirect; + dst.indirect_subreg = indirect_subreg; + dst.writemask = writemask; + dst.pad = 0; + + dst.val32 = val32; + + return tdst_validate(dst); +} + +/** + * Construct a null destination operand. + */ +static inline struct toy_dst +tdst_null(void) +{ + static const struct toy_dst null_dst = { + .file = TOY_FILE_ARF, + .type = TOY_TYPE_F, + .rect = TOY_RECT_LINEAR, + .indirect = false, + .indirect_subreg = 0, + .writemask = TOY_WRITEMASK_XYZW, + .pad = 0, + .val32 = 0, + }; + + return null_dst; +} + +/** + * Construct a destination operand from a source operand. + */ +static inline struct toy_dst +tdst_from(struct toy_src src) +{ + const enum toy_writemask writemask = + (1 << src.swizzle_x) | + (1 << src.swizzle_y) | + (1 << src.swizzle_z) | + (1 << src.swizzle_w); + + return tdst_full(src.file, src.type, src.rect, + src.indirect, src.indirect_subreg, writemask, src.val32); +} + +/** + * Construct a destination operand, assuming the type is TOY_TYPE_F, the + * rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW. + */ +static inline struct toy_dst +tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes) +{ + const enum toy_type type = TOY_TYPE_F; + const enum toy_rect rect = TOY_RECT_LINEAR; + const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes; + + return tdst_full(file, type, rect, + false, 0, TOY_WRITEMASK_XYZW, val32); +} + +/** + * Construct an immediate destination operand of type TOY_TYPE_W. + */ +static inline struct toy_dst +tdst_imm_w(int16_t w) +{ + const union fi fi = { .i = w }; + + return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR, + false, 0, TOY_WRITEMASK_XYZW, fi.ui); +} + +/** + * Return true if the source operand is null. + */ +static inline bool +tsrc_is_null(struct toy_src src) +{ + /* BRW_ARF_NULL happens to be 0 */ + return (src.file == TOY_FILE_ARF && src.val32 == 0); +} + +/** + * Return true if the source operand is swizzled. + */ +static inline bool +tsrc_is_swizzled(struct toy_src src) +{ + return (src.swizzle_x != TOY_SWIZZLE_X || + src.swizzle_y != TOY_SWIZZLE_Y || + src.swizzle_z != TOY_SWIZZLE_Z || + src.swizzle_w != TOY_SWIZZLE_W); +} + +/** + * Return true if the source operand is swizzled to the same channel. + */ +static inline bool +tsrc_is_swizzle1(struct toy_src src) +{ + return (src.swizzle_x == src.swizzle_y && + src.swizzle_x == src.swizzle_z && + src.swizzle_x == src.swizzle_w); +} + +/** + * Validate the source operand. + */ +static inline struct toy_src +tsrc_validate(struct toy_src src) +{ + switch (src.file) { + case TOY_FILE_VRF: + case TOY_FILE_ARF: + case TOY_FILE_MRF: + assert(!src.indirect); + if (src.file == TOY_FILE_MRF) + assert(src.val32 < toy_file_size(src.file)); + break; + case TOY_FILE_GRF: + if (!src.indirect) + assert(src.val32 < toy_file_size(src.file)); + break; + case TOY_FILE_IMM: + assert(!src.indirect); + break; + default: + assert(!"invalid src file"); + break; + } + + switch (src.type) { + case TOY_TYPE_V: + assert(src.file == TOY_FILE_IMM); + break; + default: + break; + } + + if (src.file != TOY_FILE_IMM) + assert(src.val32 % toy_type_size(src.type) == 0); + + assert(src.swizzle_x < 4 && src.swizzle_y < 4 && + src.swizzle_z < 4 && src.swizzle_w < 4); + + return src; +} + +/** + * Change the type of the source operand. + */ +static inline struct toy_src +tsrc_type(struct toy_src src, enum toy_type type) +{ + src.type = type; + return tsrc_validate(src); +} + +/** + * Change the type of the source operand to TOY_TYPE_D. + */ +static inline struct toy_src +tsrc_d(struct toy_src src) +{ + return tsrc_type(src, TOY_TYPE_D); +} + +/** + * Change the type of the source operand to TOY_TYPE_UD. + */ +static inline struct toy_src +tsrc_ud(struct toy_src src) +{ + return tsrc_type(src, TOY_TYPE_UD); +} + +/** + * Change the type of the source operand to TOY_TYPE_W. + */ +static inline struct toy_src +tsrc_w(struct toy_src src) +{ + return tsrc_type(src, TOY_TYPE_W); +} + +/** + * Change the type of the source operand to TOY_TYPE_UW. + */ +static inline struct toy_src +tsrc_uw(struct toy_src src) +{ + return tsrc_type(src, TOY_TYPE_UW); +} + +/** + * Change the rectangle of the source operand. + */ +static inline struct toy_src +tsrc_rect(struct toy_src src, enum toy_rect rect) +{ + src.rect = rect; + return tsrc_validate(src); +} + +/** + * Swizzle the source operand. Note that the current swizzles are honored. + */ +static inline struct toy_src +tsrc_swizzle(struct toy_src src, + enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y, + enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w) +{ + const enum toy_swizzle current[4] = { + src.swizzle_x, src.swizzle_y, + src.swizzle_z, src.swizzle_w, + }; + + src.swizzle_x = current[swizzle_x]; + src.swizzle_y = current[swizzle_y]; + src.swizzle_z = current[swizzle_z]; + src.swizzle_w = current[swizzle_w]; + + return tsrc_validate(src); +} + +/** + * Swizzle the source operand to the same channel. Note that the current + * swizzles are honored. + */ +static inline struct toy_src +tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle) +{ + return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle); +} + +/** + * Set absolute and unset negate of the source operand. + */ +static inline struct toy_src +tsrc_absolute(struct toy_src src) +{ + src.absolute = true; + src.negate = false; + return tsrc_validate(src); +} + +/** + * Negate the source operand. + */ +static inline struct toy_src +tsrc_negate(struct toy_src src) +{ + src.negate = !src.negate; + return tsrc_validate(src); +} + +/** + * Offset the source operand. + */ +static inline struct toy_src +tsrc_offset(struct toy_src src, int reg, int subreg) +{ + src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type); + return tsrc_validate(src); +} + +/** + * Construct a source operand. + */ +static inline struct toy_src +tsrc_full(enum toy_file file, enum toy_type type, + enum toy_rect rect, bool indirect, unsigned indirect_subreg, + enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y, + enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w, + bool absolute, bool negate, + uint32_t val32) +{ + struct toy_src src; + + src.file = file; + src.type = type; + src.rect = rect; + src.indirect = indirect; + src.indirect_subreg = indirect_subreg; + src.swizzle_x = swizzle_x; + src.swizzle_y = swizzle_y; + src.swizzle_z = swizzle_z; + src.swizzle_w = swizzle_w; + src.absolute = absolute; + src.negate = negate; + src.pad = 0; + + src.val32 = val32; + + return tsrc_validate(src); +} + +/** + * Construct a null source operand. + */ +static inline struct toy_src +tsrc_null(void) +{ + static const struct toy_src null_src = { + .file = TOY_FILE_ARF, + .type = TOY_TYPE_F, + .rect = TOY_RECT_LINEAR, + .indirect = false, + .indirect_subreg = 0, + .swizzle_x = TOY_SWIZZLE_X, + .swizzle_y = TOY_SWIZZLE_Y, + .swizzle_z = TOY_SWIZZLE_Z, + .swizzle_w = TOY_SWIZZLE_W, + .absolute = false, + .negate = false, + .pad = 0, + .val32 = 0, + }; + + return null_src; +} + +/** + * Construct a source operand from a destination operand. + */ +static inline struct toy_src +tsrc_from(struct toy_dst dst) +{ + enum toy_swizzle swizzle[4]; + + if (dst.writemask == TOY_WRITEMASK_XYZW) { + swizzle[0] = TOY_SWIZZLE_X; + swizzle[1] = TOY_SWIZZLE_Y; + swizzle[2] = TOY_SWIZZLE_Z; + swizzle[3] = TOY_SWIZZLE_W; + } + else { + const enum toy_swizzle first = + (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : + (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : + (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : + (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : + TOY_SWIZZLE_X; + + swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first; + swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first; + swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first; + swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first; + } + + return tsrc_full(dst.file, dst.type, dst.rect, + dst.indirect, dst.indirect_subreg, + swizzle[0], swizzle[1], swizzle[2], swizzle[3], + false, false, dst.val32); +} + +/** + * Construct a source operand, assuming the type is TOY_TYPE_F, the + * rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate. + */ +static inline struct toy_src +tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes) +{ + const enum toy_type type = TOY_TYPE_F; + const enum toy_rect rect = TOY_RECT_LINEAR; + const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes; + + return tsrc_full(file, type, rect, false, 0, + TOY_SWIZZLE_X, TOY_SWIZZLE_Y, + TOY_SWIZZLE_Z, TOY_SWIZZLE_W, + false, false, val32); +} + +/** + * Construct an immediate source operand. + */ +static inline struct toy_src +tsrc_imm(enum toy_type type, uint32_t val32) +{ + return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0, + TOY_SWIZZLE_X, TOY_SWIZZLE_Y, + TOY_SWIZZLE_Z, TOY_SWIZZLE_W, + false, false, val32); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_F. + */ +static inline struct toy_src +tsrc_imm_f(float f) +{ + const union fi fi = { .f = f }; + return tsrc_imm(TOY_TYPE_F, fi.ui); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_D. + */ +static inline struct toy_src +tsrc_imm_d(int32_t d) +{ + const union fi fi = { .i = d }; + return tsrc_imm(TOY_TYPE_D, fi.ui); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_UD. + */ +static inline struct toy_src +tsrc_imm_ud(uint32_t ud) +{ + const union fi fi = { .ui = ud }; + return tsrc_imm(TOY_TYPE_UD, fi.ui); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_W. + */ +static inline struct toy_src +tsrc_imm_w(int16_t w) +{ + const union fi fi = { .i = w }; + return tsrc_imm(TOY_TYPE_W, fi.ui); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_UW. + */ +static inline struct toy_src +tsrc_imm_uw(uint16_t uw) +{ + const union fi fi = { .ui = uw }; + return tsrc_imm(TOY_TYPE_UW, fi.ui); +} + +/** + * Construct an immediate source operand of type TOY_TYPE_V. + */ +static inline struct toy_src +tsrc_imm_v(uint32_t v) +{ + return tsrc_imm(TOY_TYPE_V, v); +} + +#endif /* TOY_REG_H */ diff --git a/src/gallium/drivers/ilo/shader/toy_helpers.h b/src/gallium/drivers/ilo/shader/toy_helpers.h new file mode 100644 index 00000000000..dca9fd7b4c3 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_helpers.h @@ -0,0 +1,289 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_HELPERS_H +#define TOY_HELPERS_H + +#include "toy_compiler.h" + +/** + * Transpose a dst operand. + * + * Instead of processing a single vertex with each of its attributes in one + * register, such as + * + * r0 = [x0, y0, z0, w0] + * + * we want to process four vertices at a time + * + * r0 = [x0, y0, z0, w0] + * r1 = [x1, y1, z1, w1] + * r2 = [x2, y2, z2, w2] + * r3 = [x3, y3, z3, w3] + * + * but with the attribute data "transposed" + * + * r0 = [x0, x1, x2, x3] + * r1 = [y0, y1, y2, y3] + * r2 = [z0, z1, z2, z3] + * r3 = [w0, w1, w2, w3] + * + * This is also known as the SoA form. + */ +static inline void +tdst_transpose(struct toy_dst dst, struct toy_dst *trans) +{ + int i; + + switch (dst.file) { + case TOY_FILE_VRF: + assert(!dst.indirect); + for (i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + trans[i] = tdst_offset(dst, i, 0); + trans[i].writemask = TOY_WRITEMASK_XYZW; + } + else { + trans[i] = tdst_null(); + } + } + break; + case TOY_FILE_ARF: + assert(tdst_is_null(dst)); + for (i = 0; i < 4; i++) + trans[i] = dst; + break; + case TOY_FILE_GRF: + case TOY_FILE_MRF: + case TOY_FILE_IMM: + default: + assert(!"unexpected file in dst transposition"); + for (i = 0; i < 4; i++) + trans[i] = tdst_null(); + break; + } +} + +/** + * Transpose a src operand. + */ +static inline void +tsrc_transpose(struct toy_src src, struct toy_src *trans) +{ + const enum toy_swizzle swizzle[4] = { + src.swizzle_x, src.swizzle_y, + src.swizzle_z, src.swizzle_w, + }; + int i; + + switch (src.file) { + case TOY_FILE_VRF: + assert(!src.indirect); + for (i = 0; i < 4; i++) { + trans[i] = tsrc_offset(src, swizzle[i], 0); + trans[i].swizzle_x = TOY_SWIZZLE_X; + trans[i].swizzle_y = TOY_SWIZZLE_Y; + trans[i].swizzle_z = TOY_SWIZZLE_Z; + trans[i].swizzle_w = TOY_SWIZZLE_W; + } + break; + case TOY_FILE_ARF: + assert(tsrc_is_null(src)); + /* fall through */ + case TOY_FILE_IMM: + for (i = 0; i < 4; i++) + trans[i] = src; + break; + case TOY_FILE_GRF: + case TOY_FILE_MRF: + default: + assert(!"unexpected file in src transposition"); + for (i = 0; i < 4; i++) + trans[i] = tsrc_null(); + break; + } +} + +static inline struct toy_src +tsrc_imm_mdesc(const struct toy_compiler *tc, + bool eot, + unsigned message_length, + unsigned response_length, + bool header_present, + uint32_t function_control) +{ + uint32_t desc; + + assert(message_length >= 1 && message_length <= 15); + assert(response_length >= 0 && response_length <= 16); + assert(function_control < 1 << 19); + + desc = eot << 31 | + message_length << 25 | + response_length << 20 | + header_present << 19 | + function_control; + + return tsrc_imm_ud(desc); +} + +static inline struct toy_src +tsrc_imm_mdesc_sampler(const struct toy_compiler *tc, + unsigned message_length, + unsigned response_length, + bool header_present, + unsigned simd_mode, + unsigned message_type, + unsigned sampler_index, + unsigned binding_table_index) +{ + const bool eot = false; + uint32_t ctrl; + + assert(simd_mode < 4); + assert(sampler_index < 16); + assert(binding_table_index < 256); + + if (tc->gen >= ILO_GEN(7)) { + ctrl = simd_mode << 17 | + message_type << 12 | + sampler_index << 8 | + binding_table_index; + } + else { + ctrl = simd_mode << 16 | + message_type << 12 | + sampler_index << 8 | + binding_table_index; + } + + return tsrc_imm_mdesc(tc, eot, message_length, + response_length, header_present, ctrl); +} + +static inline struct toy_src +tsrc_imm_mdesc_data_port(const struct toy_compiler *tc, + bool eot, + unsigned message_length, + unsigned response_length, + bool header_present, + bool send_write_commit_message, + unsigned message_type, + unsigned message_specific_control, + unsigned binding_table_index) +{ + uint32_t ctrl; + + if (tc->gen >= ILO_GEN(7)) { + assert(!send_write_commit_message); + assert((message_specific_control & 0x3f00) == message_specific_control); + + ctrl = message_type << 14 | + (message_specific_control & 0x3f00) | + binding_table_index; + } + else { + assert(!send_write_commit_message || + message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE); + assert((message_specific_control & 0x1f00) == message_specific_control); + + ctrl = send_write_commit_message << 17 | + message_type << 13 | + (message_specific_control & 0x1f00) | + binding_table_index; + } + + return tsrc_imm_mdesc(tc, eot, message_length, + response_length, header_present, ctrl); +} + +static inline struct toy_src +tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc, + unsigned message_length, + unsigned response_length, + bool write_type, + bool dword_mode, + bool invalidate_after_read, + int num_registers, + int hword_offset) +{ + const bool eot = false; + const bool header_present = true; + uint32_t ctrl; + + assert(tc->gen >= ILO_GEN(7)); + assert(num_registers == 1 || num_registers == 2 || num_registers == 4); + + ctrl = 1 << 18 | + write_type << 17 | + dword_mode << 16 | + invalidate_after_read << 15 | + (num_registers - 1) << 12 | + hword_offset; + + return tsrc_imm_mdesc(tc, eot, message_length, + response_length, header_present, ctrl); +} + +static inline struct toy_src +tsrc_imm_mdesc_urb(const struct toy_compiler *tc, + bool eot, + unsigned message_length, + unsigned response_length, + bool complete, + bool used, + bool allocate, + unsigned swizzle_control, + unsigned global_offset, + unsigned urb_opcode) +{ + const bool header_present = true; + uint32_t ctrl; + + if (tc->gen >= ILO_GEN(7)) { + const bool per_slot_offset = false; + + ctrl = per_slot_offset << 16 | + complete << 15 | + swizzle_control << 14 | + global_offset << 3 | + urb_opcode; + } + else { + ctrl = complete << 15 | + used << 14 | + allocate << 13 | + swizzle_control << 10 | + global_offset << 4 | + urb_opcode; + } + + return tsrc_imm_mdesc(tc, eot, message_length, + response_length, header_present, ctrl); +} + +#endif /* TOY_HELPERS_H */ diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.c b/src/gallium/drivers/ilo/shader/toy_legalize.c new file mode 100644 index 00000000000..04f2a2570f4 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_legalize.c @@ -0,0 +1,632 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "pipe/p_shader_tokens.h" +#include "toy_compiler.h" +#include "toy_tgsi.h" +#include "toy_helpers.h" +#include "toy_legalize.h" + +/** + * Lower an instruction to BRW_OPCODE_SEND(C). + */ +void +toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst, + bool sendc, unsigned sfid) +{ + assert(inst->opcode >= 128); + + inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND; + + /* thread control is reserved */ + assert(inst->thread_ctrl == 0); + + assert(inst->cond_modifier == BRW_CONDITIONAL_NONE); + inst->cond_modifier = sfid; +} + +static int +math_op_to_func(unsigned opcode) +{ + switch (opcode) { + case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV; + case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG; + case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP; + case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT; + case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ; + case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN; + case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS; + case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV; + case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW; + case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; + case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; + default: + assert(!"unknown math opcode"); + return -1; + } +} + +/** + * Lower virtual math opcodes to BRW_OPCODE_MATH. + */ +void +toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst) +{ + struct toy_dst tmp; + int i; + + /* see commit 250770b74d33bb8625c780a74a89477af033d13a */ + for (i = 0; i < Elements(inst->src); i++) { + if (tsrc_is_null(inst->src[i])) + break; + + /* no swizzling in align1 */ + /* XXX how about source modifiers? */ + if (toy_file_is_virtual(inst->src[i].file) && + !tsrc_is_swizzled(inst->src[i]) && + !inst->src[i].absolute && + !inst->src[i].negate) + continue; + + tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type); + tc_MOV(tc, tmp, inst->src[i]); + inst->src[i] = tsrc_from(tmp); + } + + /* FC[0:3] */ + assert(inst->cond_modifier == BRW_CONDITIONAL_NONE); + inst->cond_modifier = math_op_to_func(inst->opcode); + /* FC[4:5] */ + assert(inst->thread_ctrl == 0); + inst->thread_ctrl = 0; + + inst->opcode = BRW_OPCODE_MATH; + tc_move_inst(tc, inst); + + /* no writemask in align1 */ + if (inst->dst.writemask != TOY_WRITEMASK_XYZW) { + struct toy_dst dst = inst->dst; + struct toy_inst *inst2; + + tmp = tc_alloc_tmp(tc); + tmp.type = inst->dst.type; + inst->dst = tmp; + + inst2 = tc_MOV(tc, dst, tsrc_from(tmp)); + inst2->pred_ctrl = inst->pred_ctrl; + } +} + +static uint32_t +absolute_imm(uint32_t imm32, enum toy_type type) +{ + union fi val = { .ui = imm32 }; + + switch (type) { + case TOY_TYPE_F: + val.f = fabs(val.f); + break; + case TOY_TYPE_D: + if (val.i < 0) + val.i = -val.i; + break; + case TOY_TYPE_W: + if ((int16_t) (val.ui & 0xffff) < 0) + val.i = -((int16_t) (val.ui & 0xffff)); + break; + case TOY_TYPE_V: + assert(!"cannot take absoulte of immediates of type V"); + break; + default: + break; + } + + return val.ui; +} + +static uint32_t +negate_imm(uint32_t imm32, enum toy_type type) +{ + union fi val = { .ui = imm32 }; + + switch (type) { + case TOY_TYPE_F: + val.f = -val.f; + break; + case TOY_TYPE_D: + case TOY_TYPE_UD: + val.i = -val.i; + break; + case TOY_TYPE_W: + case TOY_TYPE_UW: + val.i = -((int16_t) (val.ui & 0xffff)); + break; + default: + assert(!"negate immediate of unknown type"); + break; + } + + return val.ui; +} + +static void +validate_imm(struct toy_compiler *tc, struct toy_inst *inst) +{ + bool move_inst = false; + int i; + + for (i = 0; i < Elements(inst->src); i++) { + struct toy_dst tmp; + + if (tsrc_is_null(inst->src[i])) + break; + + if (inst->src[i].file != TOY_FILE_IMM) + continue; + + if (inst->src[i].absolute) { + inst->src[i].val32 = + absolute_imm(inst->src[i].val32, inst->src[i].type); + inst->src[i].absolute = false; + } + + if (inst->src[i].negate) { + inst->src[i].val32 = + negate_imm(inst->src[i].val32, inst->src[i].type); + inst->src[i].negate = false; + } + + /* this is the last operand */ + if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1])) + break; + + /* need to use a temp if this imm is not the last operand */ + /* TODO we should simply swap the operands if the op is commutative */ + tmp = tc_alloc_tmp(tc); + tmp = tdst_type(tmp, inst->src[i].type); + tc_MOV(tc, tmp, inst->src[i]); + inst->src[i] = tsrc_from(tmp); + + move_inst = true; + } + + if (move_inst) + tc_move_inst(tc, inst); +} + +static void +lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst) +{ + const enum toy_type inst_type = inst->dst.type; + const struct toy_dst acc0 = + tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type); + struct toy_inst *inst2; + + /* only need to take care of integer multiplications */ + if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) + return; + + /* acc0 = (src0 & 0x0000ffff) * src1 */ + tc_MUL(tc, acc0, inst->src[0], inst->src[1]); + + /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */ + inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type), + inst->src[0], inst->src[1]); + inst2->acc_wr_ctrl = true; + + /* dst = acc0 & 0xffffffff */ + tc_MOV(tc, inst->dst, tsrc_from(acc0)); + + tc_discard_inst(tc, inst); +} + +static void +lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst) +{ + const enum toy_type inst_type = inst->dst.type; + + if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) { + const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0); + + tc_MOV(tc, acc0, inst->src[2]); + inst->src[2] = tsrc_null(); + tc_move_inst(tc, inst); + } + else { + struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type); + struct toy_inst *inst2; + + inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]); + lower_opcode_mul(tc, inst2); + + tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]); + + tc_discard_inst(tc, inst); + } +} + +/** + * Legalize the instructions for register allocation. + */ +void +toy_compiler_legalize_for_ra(struct toy_compiler *tc) +{ + struct toy_inst *inst; + + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + switch (inst->opcode) { + case BRW_OPCODE_MAC: + lower_opcode_mac(tc, inst); + break; + case BRW_OPCODE_MAD: + /* TODO operands must be floats */ + break; + case BRW_OPCODE_MUL: + lower_opcode_mul(tc, inst); + break; + default: + if (inst->opcode > TOY_OPCODE_LAST_HW) + tc_fail(tc, "internal opcodes not lowered"); + } + } + + /* loop again as the previous pass may add new instructions */ + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + validate_imm(tc, inst); + } +} + +static void +patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst) +{ + struct toy_inst *inst2; + int nest_level, dist; + + nest_level = 0; + dist = -1; + + /* search backward */ + LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev, + &tc->instructions, list) { + if (inst2->marker) { + if (inst2->opcode == BRW_OPCODE_DO) { + if (nest_level) { + nest_level--; + } + else { + /* the following instruction */ + dist++; + break; + } + } + + continue; + } + + if (inst2->opcode == BRW_OPCODE_WHILE) + nest_level++; + + dist--; + } + + if (tc->gen >= ILO_GEN(7)) + inst->src[1] = tsrc_imm_w(dist * 2); + else + inst->dst = tdst_imm_w(dist * 2); +} + +static void +patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst) +{ + struct toy_inst *inst2; + int nest_level, dist; + int jip, uip; + + nest_level = 0; + dist = 1; + jip = 0; + uip = 0; + + /* search forward */ + LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { + if (inst2->marker) + continue; + + if (inst2->opcode == BRW_OPCODE_ENDIF) { + if (nest_level) { + nest_level--; + } + else { + uip = dist * 2; + if (!jip) + jip = uip; + break; + } + } + else if (inst2->opcode == BRW_OPCODE_ELSE && + inst->opcode == BRW_OPCODE_IF) { + if (!nest_level) { + /* the following instruction */ + jip = (dist + 1) * 2; + + if (tc->gen == ILO_GEN(6)) { + uip = jip; + break; + } + } + } + else if (inst2->opcode == BRW_OPCODE_IF) { + nest_level++; + } + + dist++; + } + + if (tc->gen >= ILO_GEN(7)) { + /* what should the type be? */ + inst->dst.type = TOY_TYPE_D; + inst->src[0].type = TOY_TYPE_D; + inst->src[1] = tsrc_imm_d(uip << 16 | jip); + } + else { + inst->dst = tdst_imm_w(jip); + } + + inst->thread_ctrl = BRW_THREAD_SWITCH; +} + +static void +patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst) +{ + struct toy_inst *inst2; + bool found = false; + int dist = 1; + + /* search forward for instructions that may enable channels */ + LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { + if (inst2->marker) + continue; + + switch (inst2->opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + found = true; + break; + default: + break; + } + + if (found) + break; + + dist++; + } + + /* should we set dist to (dist - 1) or 1? */ + if (!found) + dist = 1; + + if (tc->gen >= ILO_GEN(7)) + inst->src[1] = tsrc_imm_w(dist * 2); + else + inst->dst = tdst_imm_w(dist * 2); + + inst->thread_ctrl = BRW_THREAD_SWITCH; +} + +static void +patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst) +{ + struct toy_inst *inst2, *inst3; + int nest_level, dist, jip, uip; + + nest_level = 0; + dist = 1; + jip = 1 * 2; + uip = 1 * 2; + + /* search forward */ + LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { + if (inst2->marker) { + if (inst2->opcode == BRW_OPCODE_DO) + nest_level++; + continue; + } + + if (inst2->opcode == BRW_OPCODE_ELSE || + inst2->opcode == BRW_OPCODE_ENDIF || + inst2->opcode == BRW_OPCODE_WHILE) { + jip = dist * 2; + break; + } + + dist++; + } + + /* go on to determine uip */ + inst3 = inst2; + LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) { + if (inst2->marker) { + if (inst2->opcode == BRW_OPCODE_DO) + nest_level++; + continue; + } + + if (inst2->opcode == BRW_OPCODE_WHILE) { + if (nest_level) { + nest_level--; + } + else { + /* the following instruction */ + if (tc->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK) + dist++; + + uip = dist * 2; + break; + } + } + + dist++; + } + + /* should the type be D or W? */ + inst->dst.type = TOY_TYPE_D; + inst->src[0].type = TOY_TYPE_D; + inst->src[1] = tsrc_imm_d(uip << 16 | jip); +} + +/** + * Legalize the instructions for assembling. + */ +void +toy_compiler_legalize_for_asm(struct toy_compiler *tc) +{ + struct toy_inst *inst; + int pc = 0; + + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + int i; + + pc++; + + /* + * From the Sandy Bridge PRM, volume 4 part 2, page 112: + * + * "Specifically, for instructions with a single source, it only + * uses the first source operand <src0>. In this case, the second + * source operand <src1> must be set to null and also with the same + * type as the first source operand <src0>. It is a special case + * when <src0> is an immediate, as an immediate <src0> uses DW3 of + * the instruction word, which is normally used by <src1>. In this + * case, <src1> must be programmed with register file ARF and the + * same data type as <src0>." + * + * Since we already fill unused operands with null, we only need to take + * care of the type. + */ + if (tsrc_is_null(inst->src[1])) + inst->src[1].type = inst->src[0].type; + + switch (inst->opcode) { + case BRW_OPCODE_MATH: + /* math does not support align16 nor exec_size > 8 */ + inst->access_mode = BRW_ALIGN_1; + + if (inst->exec_size == BRW_EXECUTE_16) { + /* + * From the Ivy Bridge PRM, volume 4 part 3, page 192: + * + * "INT DIV function does not support SIMD16." + */ + if (tc->gen < ILO_GEN(7) || + inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || + inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) { + struct toy_inst *inst2; + + inst->exec_size = BRW_EXECUTE_8; + inst->qtr_ctrl = GEN6_COMPRESSION_1Q; + + inst2 = tc_duplicate_inst(tc, inst); + inst2->qtr_ctrl = GEN6_COMPRESSION_2Q; + inst2->dst = tdst_offset(inst2->dst, 1, 0); + inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0); + if (!tsrc_is_null(inst2->src[1])) + inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0); + + pc++; + } + } + break; + case BRW_OPCODE_IF: + if (tc->gen >= ILO_GEN(7) && + inst->cond_modifier != BRW_CONDITIONAL_NONE) { + struct toy_inst *inst2; + + inst2 = tc_duplicate_inst(tc, inst); + + /* replace the original IF by CMP */ + inst->opcode = BRW_OPCODE_CMP; + + /* predicate control instead of condition modifier */ + inst2->dst = tdst_null(); + inst2->src[0] = tsrc_null(); + inst2->src[1] = tsrc_null(); + inst2->cond_modifier = BRW_CONDITIONAL_NONE; + inst2->pred_ctrl = BRW_PREDICATE_NORMAL; + + pc++; + } + break; + default: + break; + } + + /* MRF to GRF */ + if (tc->gen >= ILO_GEN(7)) { + for (i = 0; i < Elements(inst->src); i++) { + if (inst->src[i].file != TOY_FILE_MRF) + continue; + else if (tsrc_is_null(inst->src[i])) + break; + + inst->src[i].file = TOY_FILE_GRF; + } + + if (inst->dst.file == TOY_FILE_MRF) + inst->dst.file = TOY_FILE_GRF; + } + } + + tc->num_instructions = pc; + + /* set JIP/UIP */ + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + switch (inst->opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + patch_if_else_jip(tc, inst); + break; + case BRW_OPCODE_ENDIF: + patch_endif_jip(tc, inst); + break; + case BRW_OPCODE_WHILE: + patch_while_jip(tc, inst); + break; + case BRW_OPCODE_BREAK: + case BRW_OPCODE_CONTINUE: + patch_break_continue_jip(tc, inst); + break; + default: + break; + } + } +} diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.h b/src/gallium/drivers/ilo/shader/toy_legalize.h new file mode 100644 index 00000000000..8e2a120620b --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_legalize.h @@ -0,0 +1,52 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_LEGALIZE_H +#define TOY_LEGALIZE_H + +#include "toy_compiler.h" +#include "toy_tgsi.h" + +void +toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst, + bool sendc, unsigned sfid); + +void +toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst); + +void +toy_compiler_allocate_registers(struct toy_compiler *tc, + int start_grf, int end_grf, + int num_grf_per_vrf); + +void +toy_compiler_legalize_for_ra(struct toy_compiler *tc); + +void +toy_compiler_legalize_for_asm(struct toy_compiler *tc); + +#endif /* TOY_LEGALIZE_H */ diff --git a/src/gallium/drivers/ilo/shader/toy_legalize_ra.c b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c new file mode 100644 index 00000000000..e691f127584 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c @@ -0,0 +1,628 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include <stdlib.h> /* for qsort() */ +#include "toy_compiler.h" +#include "toy_legalize.h" + +/** + * Live interval of a VRF register. + */ +struct linear_scan_live_interval { + int vrf; + int startpoint; + int endpoint; + + /* + * should this be assigned a consecutive register of the previous + * interval's? + */ + bool consecutive; + + int reg; + + struct list_head list; +}; + +/** + * Linear scan. + */ +struct linear_scan { + struct linear_scan_live_interval *intervals; + int max_vrf, num_vrfs; + + int num_regs; + + struct list_head active_list; + int *free_regs; + int num_free_regs; + + int *vrf_mapping; +}; + +/** + * Return a chunk of registers to the free register pool. + */ +static void +linear_scan_free_regs(struct linear_scan *ls, int reg, int count) +{ + int i; + + for (i = 0; i < count; i++) + ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i; +} + +static int +linear_scan_compare_regs(const void *elem1, const void *elem2) +{ + const int *reg1 = elem1; + const int *reg2 = elem2; + + /* in reverse order */ + return (*reg2 - *reg1); +} + +/** + * Allocate a chunk of registers from the free register pool. + */ +static int +linear_scan_allocate_regs(struct linear_scan *ls, int count) +{ + bool sorted = false; + int reg; + + /* simple cases */ + if (count > ls->num_free_regs) + return -1; + else if (count == 1) + return ls->free_regs[--ls->num_free_regs]; + + /* TODO a free register pool */ + /* TODO reserve some regs for spilling */ + while (true) { + bool found = false; + int start; + + /* + * find a chunk of registers that have consecutive register + * numbers + */ + for (start = ls->num_free_regs - 1; start >= count - 1; start--) { + int i; + + for (i = 1; i < count; i++) { + if (ls->free_regs[start - i] != ls->free_regs[start] + i) + break; + } + + if (i >= count) { + found = true; + break; + } + } + + if (found) { + reg = ls->free_regs[start]; + + if (start != ls->num_free_regs - 1) { + start++; + memmove(&ls->free_regs[start - count], + &ls->free_regs[start], + sizeof(*ls->free_regs) * (ls->num_free_regs - start)); + } + ls->num_free_regs -= count; + break; + } + else if (!sorted) { + /* sort and retry */ + qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs), + linear_scan_compare_regs); + sorted = true; + } + else { + /* failed */ + reg = -1; + break; + } + } + + return reg; +} + +/** + * Add an interval to the active list. + */ +static void +linear_scan_add_active(struct linear_scan *ls, + struct linear_scan_live_interval *interval) +{ + struct linear_scan_live_interval *pos; + + /* keep the active list sorted by endpoints */ + LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) { + if (pos->endpoint >= interval->endpoint) + break; + } + + list_addtail(&interval->list, &pos->list); +} + +/** + * Remove an interval from the active list. + */ +static void +linear_scan_remove_active(struct linear_scan *ls, + struct linear_scan_live_interval *interval) +{ + list_del(&interval->list); +} + +/** + * Remove intervals that are no longer active from the active list. + */ +static void +linear_scan_expire_active(struct linear_scan *ls, int pc) +{ + struct linear_scan_live_interval *interval, *next; + + LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) { + /* + * since we sort intervals on the active list by their endpoints, we + * know that this and the rest of the intervals are still active. + */ + if (interval->endpoint >= pc) + break; + + linear_scan_remove_active(ls, interval); + + /* recycle the reg */ + linear_scan_free_regs(ls, interval->reg, 1); + } +} + +/** + * Spill an interval. + */ +static void +linear_scan_spill(struct linear_scan *ls, + struct linear_scan_live_interval *interval, + bool is_active) +{ + assert(!"no spilling support"); +} + +/** + * Spill a range of intervals. + */ +static void +linear_scan_spill_range(struct linear_scan *ls, int first, int count) +{ + int i; + + for (i = 0; i < count; i++) { + struct linear_scan_live_interval *interval = &ls->intervals[first + i]; + + linear_scan_spill(ls, interval, false); + } +} + +/** + * Perform linear scan to allocate registers for the intervals. + */ +static bool +linear_scan_run(struct linear_scan *ls) +{ + int i; + + i = 0; + while (i < ls->num_vrfs) { + struct linear_scan_live_interval *first = &ls->intervals[i]; + int reg, count; + + /* + * BRW_OPCODE_SEND may write to multiple consecutive registers and we need to + * support that + */ + for (count = 1; i + count < ls->num_vrfs; count++) { + const struct linear_scan_live_interval *interval = + &ls->intervals[i + count]; + + if (interval->startpoint != first->startpoint || + !interval->consecutive) + break; + } + + reg = linear_scan_allocate_regs(ls, count); + + /* expire intervals that are no longer active and try again */ + if (reg < 0) { + linear_scan_expire_active(ls, first->startpoint); + reg = linear_scan_allocate_regs(ls, count); + } + + /* have to spill some intervals */ + if (reg < 0) { + struct linear_scan_live_interval *last_active = + container_of(ls->active_list.prev, + (struct linear_scan_live_interval *) NULL, list); + + /* heuristically spill the interval that ends last */ + if (count > 1 || last_active->endpoint < first->endpoint) { + linear_scan_spill_range(ls, i, count); + i += count; + continue; + } + + /* make some room for the new interval */ + linear_scan_spill(ls, last_active, true); + reg = linear_scan_allocate_regs(ls, count); + if (reg < 0) { + assert(!"failed to spill any register"); + return false; + } + } + + while (count--) { + struct linear_scan_live_interval *interval = &ls->intervals[i++]; + + interval->reg = reg++; + linear_scan_add_active(ls, interval); + + ls->vrf_mapping[interval->vrf] = interval->reg; + + /* + * this should and must be the case because of how we initialized the + * intervals + */ + assert(interval->vrf - first->vrf == interval->reg - first->reg); + } + } + + return true; +} + +/** + * Add a new interval. + */ +static void +linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc) +{ + if (ls->intervals[vrf].vrf) + return; + + ls->intervals[vrf].vrf = vrf; + ls->intervals[vrf].startpoint = pc; + + ls->num_vrfs++; + if (vrf > ls->max_vrf) + ls->max_vrf = vrf; +} + +/** + * Perform (oversimplified?) live variable analysis. + */ +static void +linear_scan_init_live_intervals(struct linear_scan *ls, + struct toy_compiler *tc) +{ + const struct toy_inst *inst; + int pc, do_pc, while_pc; + + pc = 0; + do_pc = -1; + while_pc = -1; + + tc_head(tc); + while ((inst = tc_next_no_skip(tc)) != NULL) { + const int startpoint = (pc <= while_pc) ? do_pc : pc; + const int endpoint = (pc <= while_pc) ? while_pc : pc; + int vrf, i; + + /* + * assume all registers used in this outermost loop are live through out + * the whole loop + */ + if (inst->marker) { + if (pc > while_pc) { + struct toy_inst *inst2; + int loop_level = 1; + + assert(inst->opcode == BRW_OPCODE_DO); + do_pc = pc; + while_pc = pc + 1; + + /* find the matching BRW_OPCODE_WHILE */ + LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next, + &tc->instructions, list) { + if (inst2->marker) { + assert(inst->opcode == BRW_OPCODE_DO); + loop_level++; + continue; + } + + if (inst2->opcode == BRW_OPCODE_WHILE) { + loop_level--; + if (!loop_level) + break; + } + while_pc++; + } + } + + continue; + } + + if (inst->dst.file == TOY_FILE_VRF) { + int num_dst; + + /* TODO this is a hack */ + if (inst->opcode == BRW_OPCODE_SEND || + inst->opcode == BRW_OPCODE_SENDC) { + const uint32_t mdesc = inst->src[1].val32; + int response_length = (mdesc >> 20) & 0x1f; + + num_dst = response_length; + if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16) + num_dst /= 2; + } + else { + num_dst = 1; + } + + vrf = inst->dst.val32 / TOY_REG_WIDTH; + + for (i = 0; i < num_dst; i++) { + /* first use */ + if (!ls->intervals[vrf].vrf) + linear_scan_add_live_interval(ls, vrf, startpoint); + + ls->intervals[vrf].endpoint = endpoint; + ls->intervals[vrf].consecutive = (i > 0); + + vrf++; + } + } + + for (i = 0; i < Elements(inst->src); i++) { + if (inst->src[i].file != TOY_FILE_VRF) + continue; + + vrf = inst->src[i].val32 / TOY_REG_WIDTH; + + /* first use */ + if (!ls->intervals[vrf].vrf) + linear_scan_add_live_interval(ls, vrf, startpoint); + + ls->intervals[vrf].endpoint = endpoint; + } + + pc++; + } +} + +/** + * Clean up after performing linear scan. + */ +static void +linear_scan_cleanup(struct linear_scan *ls) +{ + FREE(ls->vrf_mapping); + FREE(ls->intervals); + FREE(ls->free_regs); +} + +static int +linear_scan_compare_live_intervals(const void *elem1, const void *elem2) +{ + const struct linear_scan_live_interval *interval1 = elem1; + const struct linear_scan_live_interval *interval2 = elem2; + + /* make unused elements appear at the end */ + if (!interval1->vrf) + return 1; + else if (!interval2->vrf) + return -1; + + /* sort by startpoints first, and then by vrf */ + if (interval1->startpoint != interval2->startpoint) + return (interval1->startpoint - interval2->startpoint); + else + return (interval1->vrf - interval2->vrf); + +} + +/** + * Prepare for linear scan. + */ +static bool +linear_scan_init(struct linear_scan *ls, int num_regs, + struct toy_compiler *tc) +{ + int num_intervals, i; + + memset(ls, 0, sizeof(*ls)); + + /* this may be much larger than ls->num_vrfs... */ + num_intervals = tc->next_vrf; + ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0])); + if (!ls->intervals) + return false; + + linear_scan_init_live_intervals(ls, tc); + /* sort intervals by startpoints */ + qsort(ls->intervals, num_intervals, sizeof(*ls->intervals), + linear_scan_compare_live_intervals); + + ls->num_regs = num_regs; + ls->num_free_regs = num_regs; + + ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs)); + if (!ls->free_regs) { + FREE(ls->intervals); + return false; + } + + /* add in reverse order as we will allocate from the tail */ + for (i = 0; i < ls->num_regs; i++) + ls->free_regs[i] = num_regs - i - 1; + + list_inithead(&ls->active_list); + + ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping)); + if (!ls->vrf_mapping) { + FREE(ls->intervals); + FREE(ls->free_regs); + return false; + } + + return true; +} + +/** + * Allocate registers with linear scan. + */ +static void +linear_scan_allocation(struct toy_compiler *tc, + int start_grf, int end_grf, + int num_grf_per_vrf) +{ + const int num_grfs = end_grf - start_grf + 1; + struct linear_scan ls; + struct toy_inst *inst; + + if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc)) + return; + + if (!linear_scan_run(&ls)) { + tc_fail(tc, "failed to allocate registers"); + return; + } + + + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + int i; + + if (inst->dst.file == TOY_FILE_VRF) { + const uint32_t val32 = inst->dst.val32; + int reg = val32 / TOY_REG_WIDTH; + int subreg = val32 % TOY_REG_WIDTH; + + /* map to GRF */ + reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; + + inst->dst.file = TOY_FILE_GRF; + inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; + } + + for (i = 0; i < Elements(inst->src); i++) { + const uint32_t val32 = inst->src[i].val32; + int reg, subreg; + + if (inst->src[i].file != TOY_FILE_VRF) + continue; + + reg = val32 / TOY_REG_WIDTH; + subreg = val32 % TOY_REG_WIDTH; + + /* map to GRF */ + reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; + + inst->src[i].file = TOY_FILE_GRF; + inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; + } + } + + linear_scan_cleanup(&ls); +} + +/** + * Trivially allocate registers. + */ +static void +trivial_allocation(struct toy_compiler *tc, + int start_grf, int end_grf, + int num_grf_per_vrf) +{ + struct toy_inst *inst; + int max_grf = -1; + + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + int i; + + if (inst->dst.file == TOY_FILE_VRF) { + const uint32_t val32 = inst->dst.val32; + int reg = val32 / TOY_REG_WIDTH; + int subreg = val32 % TOY_REG_WIDTH; + + reg = reg * num_grf_per_vrf + start_grf - 1; + + inst->dst.file = TOY_FILE_GRF; + inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; + + if (reg > max_grf) + max_grf = reg; + } + + for (i = 0; i < Elements(inst->src); i++) { + const uint32_t val32 = inst->src[i].val32; + int reg, subreg; + + if (inst->src[i].file != TOY_FILE_VRF) + continue; + + reg = val32 / TOY_REG_WIDTH; + subreg = val32 % TOY_REG_WIDTH; + + reg = reg * num_grf_per_vrf + start_grf - 1; + + inst->src[i].file = TOY_FILE_GRF; + inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; + + if (reg > max_grf) + max_grf = reg; + } + } + + if (max_grf + num_grf_per_vrf - 1 > end_grf) + tc_fail(tc, "failed to allocate registers"); +} + +/** + * Allocate GRF registers to VRF registers. + */ +void +toy_compiler_allocate_registers(struct toy_compiler *tc, + int start_grf, int end_grf, + int num_grf_per_vrf) +{ + if (true) + linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf); + else + trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf); +} diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.c b/src/gallium/drivers/ilo/shader/toy_optimize.c new file mode 100644 index 00000000000..62a663f4901 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_optimize.c @@ -0,0 +1,71 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "toy_compiler.h" +#include "toy_tgsi.h" +#include "toy_optimize.h" + +/** + * This just eliminates instructions with null dst so far. + */ +static void +eliminate_dead_code(struct toy_compiler *tc) +{ + struct toy_inst *inst; + + tc_head(tc); + while ((inst = tc_next(tc)) != NULL) { + switch (inst->opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_WHILE: + case BRW_OPCODE_BREAK: + case BRW_OPCODE_CONTINUE: + case BRW_OPCODE_SEND: + case BRW_OPCODE_SENDC: + case BRW_OPCODE_NOP: + /* never eliminated */ + break; + default: + if (tdst_is_null(inst->dst) || !inst->dst.writemask) { + /* math is always BRW_CONDITIONAL_NONE */ + if ((inst->opcode == BRW_OPCODE_MATH || + inst->cond_modifier == BRW_CONDITIONAL_NONE) && + !inst->acc_wr_ctrl) + tc_discard_inst(tc, inst); + } + break; + } + } +} + +void +toy_compiler_optimize(struct toy_compiler *tc) +{ + eliminate_dead_code(tc); +} diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.h b/src/gallium/drivers/ilo/shader/toy_optimize.h new file mode 100644 index 00000000000..f65198c7538 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_optimize.h @@ -0,0 +1,36 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_OPTIMIZE_H +#define TOY_OPTIMIZE_H + +#include "toy_compiler.h" + +void +toy_compiler_optimize(struct toy_compiler *tc); + +#endif /* TOY_OPTIMIZE_H */ diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c new file mode 100644 index 00000000000..c2b1da5ed55 --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -0,0 +1,2736 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_strings.h" +#include "util/u_hash_table.h" +#include "toy_helpers.h" +#include "toy_tgsi.h" + +/* map TGSI opcode to GEN opcode 1-to-1 */ +static const struct { + int opcode; + int num_dst; + int num_src; +} aos_simple_opcode_map[TGSI_OPCODE_LAST] = { + [TGSI_OPCODE_ARL] = { BRW_OPCODE_RNDD, 1, 1 }, + [TGSI_OPCODE_MOV] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 }, + [TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 }, + [TGSI_OPCODE_MUL] = { BRW_OPCODE_MUL, 1, 2 }, + [TGSI_OPCODE_ADD] = { BRW_OPCODE_ADD, 1, 2 }, + [TGSI_OPCODE_DP3] = { BRW_OPCODE_DP3, 1, 2 }, + [TGSI_OPCODE_DP4] = { BRW_OPCODE_DP4, 1, 2 }, + [TGSI_OPCODE_MIN] = { BRW_OPCODE_SEL, 1, 2 }, + [TGSI_OPCODE_MAX] = { BRW_OPCODE_SEL, 1, 2 }, + /* a later pass will move src[2] to accumulator */ + [TGSI_OPCODE_MAD] = { BRW_OPCODE_MAC, 1, 3 }, + [TGSI_OPCODE_SUB] = { BRW_OPCODE_ADD, 1, 2 }, + [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 }, + [TGSI_OPCODE_FRC] = { BRW_OPCODE_FRC, 1, 1 }, + [TGSI_OPCODE_FLR] = { BRW_OPCODE_RNDD, 1, 1 }, + [TGSI_OPCODE_ROUND] = { BRW_OPCODE_RNDE, 1, 1 }, + [TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 }, + [TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 }, + [TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 }, + [TGSI_OPCODE_ABS] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_DPH] = { BRW_OPCODE_DPH, 1, 2 }, + [TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 }, + [TGSI_OPCODE_KILP] = { TOY_OPCODE_KIL, 0, 0 }, + [TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 }, + [TGSI_OPCODE_ARR] = { BRW_OPCODE_RNDZ, 1, 1 }, + [TGSI_OPCODE_DP2] = { BRW_OPCODE_DP2, 1, 2 }, + [TGSI_OPCODE_IF] = { BRW_OPCODE_IF, 0, 1 }, + [TGSI_OPCODE_UIF] = { BRW_OPCODE_IF, 0, 1 }, + [TGSI_OPCODE_ELSE] = { BRW_OPCODE_ELSE, 0, 0 }, + [TGSI_OPCODE_ENDIF] = { BRW_OPCODE_ENDIF, 0, 0 }, + [TGSI_OPCODE_I2F] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_NOT] = { BRW_OPCODE_NOT, 1, 1 }, + [TGSI_OPCODE_TRUNC] = { BRW_OPCODE_RNDZ, 1, 1 }, + [TGSI_OPCODE_SHL] = { BRW_OPCODE_SHL, 1, 2 }, + [TGSI_OPCODE_AND] = { BRW_OPCODE_AND, 1, 2 }, + [TGSI_OPCODE_OR] = { BRW_OPCODE_OR, 1, 2 }, + [TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, + [TGSI_OPCODE_XOR] = { BRW_OPCODE_XOR, 1, 2 }, + [TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 }, + [TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 }, + [TGSI_OPCODE_NOP] = { BRW_OPCODE_NOP, 0, 0 }, + [TGSI_OPCODE_KIL] = { TOY_OPCODE_KIL, 0, 1 }, + [TGSI_OPCODE_END] = { BRW_OPCODE_NOP, 0, 0 }, + [TGSI_OPCODE_F2I] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, + [TGSI_OPCODE_IMAX] = { BRW_OPCODE_SEL, 1, 2 }, + [TGSI_OPCODE_IMIN] = { BRW_OPCODE_SEL, 1, 2 }, + [TGSI_OPCODE_INEG] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_ISHR] = { BRW_OPCODE_ASR, 1, 2 }, + [TGSI_OPCODE_F2U] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_U2F] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_UADD] = { BRW_OPCODE_ADD, 1, 2 }, + [TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, + /* a later pass will move src[2] to accumulator */ + [TGSI_OPCODE_UMAD] = { BRW_OPCODE_MAC, 1, 3 }, + [TGSI_OPCODE_UMAX] = { BRW_OPCODE_SEL, 1, 2 }, + [TGSI_OPCODE_UMIN] = { BRW_OPCODE_SEL, 1, 2 }, + [TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, + [TGSI_OPCODE_UMUL] = { BRW_OPCODE_MUL, 1, 2 }, + [TGSI_OPCODE_USHR] = { BRW_OPCODE_SHR, 1, 2 }, + [TGSI_OPCODE_UARL] = { BRW_OPCODE_MOV, 1, 1 }, + [TGSI_OPCODE_IABS] = { BRW_OPCODE_MOV, 1, 1 }, +}; + +static void +aos_simple(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + int opcode; + int cond_modifier = BRW_CONDITIONAL_NONE; + int num_dst = tgsi_inst->Instruction.NumDstRegs; + int num_src = tgsi_inst->Instruction.NumSrcRegs; + int i; + + opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; + assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst); + assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src); + if (!opcode) { + assert(!"invalid aos_simple() call"); + return; + } + + /* no need to emit nop */ + if (opcode == BRW_OPCODE_NOP) + return; + + inst = tc_add(tc); + if (!inst) + return; + + inst->opcode = opcode; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_UMIN: + cond_modifier = BRW_CONDITIONAL_L; + break; + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_UMAX: + cond_modifier = BRW_CONDITIONAL_GE; + break; + case TGSI_OPCODE_SUB: + src[1] = tsrc_negate(src[1]); + break; + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_IABS: + src[0] = tsrc_absolute(src[0]); + break; + case TGSI_OPCODE_IF: + cond_modifier = BRW_CONDITIONAL_NEQ; + num_src = 2; + assert(src[0].type == TOY_TYPE_F); + src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + src[1] = tsrc_imm_f(0.0f); + break; + case TGSI_OPCODE_UIF: + cond_modifier = BRW_CONDITIONAL_NEQ; + num_src = 2; + assert(src[0].type == TOY_TYPE_D); + src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + src[1] = tsrc_imm_d(0); + break; + case TGSI_OPCODE_INEG: + src[0] = tsrc_negate(src[0]); + break; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + break; + case TGSI_OPCODE_POW: + src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X); + break; + } + + inst->cond_modifier = cond_modifier; + + if (num_dst) { + assert(num_dst == 1); + inst->dst = dst[0]; + } + + assert(num_src <= Elements(inst->src)); + for (i = 0; i < num_src; i++) + inst->src[i] = src[i]; +} + +static void +aos_set_on_cond(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + int cond; + struct toy_src zero, one; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_USLT: + cond = BRW_CONDITIONAL_L; + break; + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USGE: + cond = BRW_CONDITIONAL_GE; + break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_USEQ: + cond = BRW_CONDITIONAL_EQ; + break; + case TGSI_OPCODE_SGT: + cond = BRW_CONDITIONAL_G; + break; + case TGSI_OPCODE_SLE: + cond = BRW_CONDITIONAL_LE; + break; + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_USNE: + cond = BRW_CONDITIONAL_NEQ; + break; + default: + assert(!"invalid aos_set_on_cond() call"); + return; + } + + /* note that for integer versions, all bits are set */ + switch (dst[0].type) { + case TOY_TYPE_F: + default: + zero = tsrc_imm_f(0.0f); + one = tsrc_imm_f(1.0f); + break; + case TOY_TYPE_D: + zero = tsrc_imm_d(0); + one = tsrc_imm_d(-1); + break; + case TOY_TYPE_UD: + zero = tsrc_imm_ud(0); + one = tsrc_imm_ud(~0); + break; + } + + tc_MOV(tc, dst[0], zero); + tc_CMP(tc, tdst_null(), src[0], src[1], cond); + inst = tc_MOV(tc, dst[0], one); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +aos_compare(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + struct toy_src zero; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_CMP: + zero = tsrc_imm_f(0.0f); + break; + case TGSI_OPCODE_UCMP: + zero = tsrc_imm_ud(0); + break; + default: + assert(!"invalid aos_compare() call"); + return; + } + + tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L); + inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +aos_set_sign(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + struct toy_src zero, one, neg_one; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_SSG: + zero = tsrc_imm_f(0.0f); + one = tsrc_imm_f(1.0f); + neg_one = tsrc_imm_f(-1.0f); + break; + case TGSI_OPCODE_ISSG: + zero = tsrc_imm_d(0); + one = tsrc_imm_d(1); + neg_one = tsrc_imm_d(-1); + break; + default: + assert(!"invalid aos_set_sign() call"); + return; + } + + tc_MOV(tc, dst[0], zero); + + tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G); + inst = tc_MOV(tc, dst[0], one); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; + + tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L); + inst = tc_MOV(tc, dst[0], neg_one); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +aos_tex(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + enum toy_opcode opcode; + int i; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + opcode = TOY_OPCODE_TGSI_TEX; + break; + case TGSI_OPCODE_TXD: + opcode = TOY_OPCODE_TGSI_TXD; + break; + case TGSI_OPCODE_TXP: + opcode = TOY_OPCODE_TGSI_TXP; + break; + case TGSI_OPCODE_TXB: + opcode = TOY_OPCODE_TGSI_TXB; + break; + case TGSI_OPCODE_TXL: + opcode = TOY_OPCODE_TGSI_TXL; + break; + case TGSI_OPCODE_TXF: + opcode = TOY_OPCODE_TGSI_TXF; + break; + case TGSI_OPCODE_TXQ: + opcode = TOY_OPCODE_TGSI_TXQ; + break; + case TGSI_OPCODE_TXQ_LZ: + opcode = TOY_OPCODE_TGSI_TXQ_LZ; + break; + case TGSI_OPCODE_TEX2: + opcode = TOY_OPCODE_TGSI_TEX2; + case TGSI_OPCODE_TXB2: + opcode = TOY_OPCODE_TGSI_TXB2; + break; + case TGSI_OPCODE_TXL2: + opcode = TOY_OPCODE_TGSI_TXL2; + break; + default: + assert(!"unsupported texturing opcode"); + return; + break; + } + + assert(tgsi_inst->Instruction.Texture); + + inst = tc_add(tc); + inst->opcode = opcode; + inst->tex.target = tgsi_inst->Texture.Texture; + + assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src)); + assert(tgsi_inst->Instruction.NumDstRegs == 1); + + inst->dst = dst[0]; + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + inst->src[i] = src[i]; + + for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) + tc_fail(tc, "texelFetchOffset unsupported"); +} + +static void +aos_sample(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + enum toy_opcode opcode; + int i; + + assert(!"sampling untested"); + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_SAMPLE: + opcode = TOY_OPCODE_TGSI_SAMPLE; + break; + case TGSI_OPCODE_SAMPLE_I: + opcode = TOY_OPCODE_TGSI_SAMPLE_I; + break; + case TGSI_OPCODE_SAMPLE_I_MS: + opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS; + break; + case TGSI_OPCODE_SAMPLE_B: + opcode = TOY_OPCODE_TGSI_SAMPLE_B; + break; + case TGSI_OPCODE_SAMPLE_C: + opcode = TOY_OPCODE_TGSI_SAMPLE_C; + break; + case TGSI_OPCODE_SAMPLE_C_LZ: + opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ; + break; + case TGSI_OPCODE_SAMPLE_D: + opcode = TOY_OPCODE_TGSI_SAMPLE_D; + break; + case TGSI_OPCODE_SAMPLE_L: + opcode = TOY_OPCODE_TGSI_SAMPLE_L; + break; + case TGSI_OPCODE_GATHER4: + opcode = TOY_OPCODE_TGSI_GATHER4; + break; + case TGSI_OPCODE_SVIEWINFO: + opcode = TOY_OPCODE_TGSI_SVIEWINFO; + break; + case TGSI_OPCODE_SAMPLE_POS: + opcode = TOY_OPCODE_TGSI_SAMPLE_POS; + break; + case TGSI_OPCODE_SAMPLE_INFO: + opcode = TOY_OPCODE_TGSI_SAMPLE_INFO; + break; + default: + assert(!"unsupported sampling opcode"); + return; + break; + } + + inst = tc_add(tc); + inst->opcode = opcode; + + assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src)); + assert(tgsi_inst->Instruction.NumDstRegs == 1); + + inst->dst = dst[0]; + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + inst->src[i] = src[i]; +} + +static void +aos_LIT(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f)); + + if (!(dst[0].writemask & TOY_WRITEMASK_YZ)) + return; + + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f)); + + tc_CMP(tc, tdst_null(), + tsrc_swizzle1(src[0], TOY_SWIZZLE_X), + tsrc_imm_f(0.0f), + BRW_CONDITIONAL_G); + + inst = tc_MOV(tc, + tdst_writemask(dst[0], TOY_WRITEMASK_Y), + tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; + + /* clamp W to (-128, 128)? */ + inst = tc_POW(tc, + tdst_writemask(dst[0], TOY_WRITEMASK_Z), + tsrc_swizzle1(src[0], TOY_SWIZZLE_Y), + tsrc_swizzle1(src[0], TOY_SWIZZLE_W)); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +aos_EXP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + + if (dst[0].writemask & TOY_WRITEMASK_X) { + struct toy_dst tmp = + tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); + + tc_RNDD(tc, tmp, src0); + + /* construct the floating point number manually */ + tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); + tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)), + tsrc_from(tmp), tsrc_imm_d(23)); + } + + tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0); + tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); +} + +static void +aos_LOG(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); + + if (dst[0].writemask & TOY_WRITEMASK_XY) { + struct toy_dst tmp; + + tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); + + /* exponent */ + tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23)); + tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), + tsrc_from(tmp), tsrc_imm_d(-127)); + + /* mantissa */ + tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1)); + tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y), + tsrc_from(tmp), tsrc_imm_d(127 << 23)); + } + + tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); +} + +static void +aos_DST(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f)); + tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]); + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]); + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]); +} + +static void +aos_LRP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f)); + tc_MUL(tc, tmp, tsrc_from(tmp), src[2]); + tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp)); +} + +static void +aos_CND(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + + assert(!"CND untested"); + + tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G); + inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +aos_DP2A(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + assert(!"DP2A untested"); + + tc_DP2(tc, tmp, src[0], src[1]); + tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]); +} + +static void +aos_CLAMP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + assert(!"CLAMP untested"); + + tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE); + tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L); +} + +static void +aos_XPD(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ), + tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, + TOY_SWIZZLE_Y, TOY_SWIZZLE_W), + tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, + TOY_SWIZZLE_X, TOY_SWIZZLE_W)); + + tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ), + tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, + TOY_SWIZZLE_X, TOY_SWIZZLE_W), + tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, + TOY_SWIZZLE_Y, TOY_SWIZZLE_W), + tsrc_negate(tsrc_from(tmp))); + + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), + tsrc_imm_f(1.0f)); +} + +static void +aos_PK2H(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); + const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y)); + struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); + + assert(!"PK2H untested"); + + tc_SHL(tc, tmp, h2, tsrc_imm_ud(16)); + tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp)); +} + +static void +aos_SFL(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + assert(!"SFL untested"); + + tc_MOV(tc, dst[0], tsrc_imm_f(0.0f)); +} + +static void +aos_STR(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + assert(!"STR untested"); + + tc_MOV(tc, dst[0], tsrc_imm_f(1.0f)); +} + +static void +aos_UP2H(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + assert(!"UP2H untested"); + + tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ), + tsrc_ud(src[0]), tsrc_imm_ud(0xffff)); + tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW), + tsrc_ud(src[0]), tsrc_imm_ud(16)); +} + +static void +aos_SCS(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + assert(!"SCS untested"); + + tc_add1(tc, TOY_OPCODE_COS, + tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]); + + tc_add1(tc, TOY_OPCODE_SIN, + tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]); + + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f)); + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); +} + +static void +aos_NRM(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + assert(!"NRM untested"); + + tc_DP3(tc, tmp, src[0], src[0]); + tc_INV(tc, tmp, tsrc_from(tmp)); + tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ), + src[0], tsrc_from(tmp)); + + tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); +} + +static void +aos_DIV(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + assert(!"DIV untested"); + + tc_INV(tc, tmp, src[1]); + tc_MUL(tc, dst[0], src[0], tsrc_from(tmp)); +} + +static void +aos_BRK(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + tc_add0(tc, BRW_OPCODE_BREAK); +} + +static void +aos_CEIL(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + tc_RNDD(tc, tmp, tsrc_negate(src[0])); + tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp))); +} + +static void +aos_SAD(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + assert(!"SAD untested"); + + tc_ADD(tc, tmp, src[0], tsrc_negate(src[1])); + tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]); +} + +static void +aos_CONT(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + tc_add0(tc, BRW_OPCODE_CONTINUE); +} + +static void +aos_BGNLOOP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_inst *inst; + + inst = tc_add0(tc, BRW_OPCODE_DO); + /* this is just a marker */ + inst->marker = true; +} + +static void +aos_ENDLOOP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + tc_add0(tc, BRW_OPCODE_WHILE); +} + +static void +aos_NRM4(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + struct toy_dst tmp = tc_alloc_tmp(tc); + + assert(!"NRM4 untested"); + + tc_DP4(tc, tmp, src[0], src[0]); + tc_INV(tc, tmp, tsrc_from(tmp)); + tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp)); +} + +static void +aos_unsupported(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src) +{ + const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode); + + ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name); + + tc_fail(tc, "unsupported TGSI instruction"); +} + +static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { + [TGSI_OPCODE_ARL] = aos_simple, + [TGSI_OPCODE_MOV] = aos_simple, + [TGSI_OPCODE_LIT] = aos_LIT, + [TGSI_OPCODE_RCP] = aos_simple, + [TGSI_OPCODE_RSQ] = aos_simple, + [TGSI_OPCODE_EXP] = aos_EXP, + [TGSI_OPCODE_LOG] = aos_LOG, + [TGSI_OPCODE_MUL] = aos_simple, + [TGSI_OPCODE_ADD] = aos_simple, + [TGSI_OPCODE_DP3] = aos_simple, + [TGSI_OPCODE_DP4] = aos_simple, + [TGSI_OPCODE_DST] = aos_DST, + [TGSI_OPCODE_MIN] = aos_simple, + [TGSI_OPCODE_MAX] = aos_simple, + [TGSI_OPCODE_SLT] = aos_set_on_cond, + [TGSI_OPCODE_SGE] = aos_set_on_cond, + [TGSI_OPCODE_MAD] = aos_simple, + [TGSI_OPCODE_SUB] = aos_simple, + [TGSI_OPCODE_LRP] = aos_LRP, + [TGSI_OPCODE_CND] = aos_CND, + [TGSI_OPCODE_SQRT] = aos_simple, + [TGSI_OPCODE_DP2A] = aos_DP2A, + [22] = aos_unsupported, + [23] = aos_unsupported, + [TGSI_OPCODE_FRC] = aos_simple, + [TGSI_OPCODE_CLAMP] = aos_CLAMP, + [TGSI_OPCODE_FLR] = aos_simple, + [TGSI_OPCODE_ROUND] = aos_simple, + [TGSI_OPCODE_EX2] = aos_simple, + [TGSI_OPCODE_LG2] = aos_simple, + [TGSI_OPCODE_POW] = aos_simple, + [TGSI_OPCODE_XPD] = aos_XPD, + [32] = aos_unsupported, + [TGSI_OPCODE_ABS] = aos_simple, + [TGSI_OPCODE_RCC] = aos_unsupported, + [TGSI_OPCODE_DPH] = aos_simple, + [TGSI_OPCODE_COS] = aos_simple, + [TGSI_OPCODE_DDX] = aos_unsupported, + [TGSI_OPCODE_DDY] = aos_unsupported, + [TGSI_OPCODE_KILP] = aos_simple, + [TGSI_OPCODE_PK2H] = aos_PK2H, + [TGSI_OPCODE_PK2US] = aos_unsupported, + [TGSI_OPCODE_PK4B] = aos_unsupported, + [TGSI_OPCODE_PK4UB] = aos_unsupported, + [TGSI_OPCODE_RFL] = aos_unsupported, + [TGSI_OPCODE_SEQ] = aos_set_on_cond, + [TGSI_OPCODE_SFL] = aos_SFL, + [TGSI_OPCODE_SGT] = aos_set_on_cond, + [TGSI_OPCODE_SIN] = aos_simple, + [TGSI_OPCODE_SLE] = aos_set_on_cond, + [TGSI_OPCODE_SNE] = aos_set_on_cond, + [TGSI_OPCODE_STR] = aos_STR, + [TGSI_OPCODE_TEX] = aos_tex, + [TGSI_OPCODE_TXD] = aos_tex, + [TGSI_OPCODE_TXP] = aos_tex, + [TGSI_OPCODE_UP2H] = aos_UP2H, + [TGSI_OPCODE_UP2US] = aos_unsupported, + [TGSI_OPCODE_UP4B] = aos_unsupported, + [TGSI_OPCODE_UP4UB] = aos_unsupported, + [TGSI_OPCODE_X2D] = aos_unsupported, + [TGSI_OPCODE_ARA] = aos_unsupported, + [TGSI_OPCODE_ARR] = aos_simple, + [TGSI_OPCODE_BRA] = aos_unsupported, + [TGSI_OPCODE_CAL] = aos_unsupported, + [TGSI_OPCODE_RET] = aos_unsupported, + [TGSI_OPCODE_SSG] = aos_set_sign, + [TGSI_OPCODE_CMP] = aos_compare, + [TGSI_OPCODE_SCS] = aos_SCS, + [TGSI_OPCODE_TXB] = aos_tex, + [TGSI_OPCODE_NRM] = aos_NRM, + [TGSI_OPCODE_DIV] = aos_DIV, + [TGSI_OPCODE_DP2] = aos_simple, + [TGSI_OPCODE_TXL] = aos_tex, + [TGSI_OPCODE_BRK] = aos_BRK, + [TGSI_OPCODE_IF] = aos_simple, + [TGSI_OPCODE_UIF] = aos_simple, + [76] = aos_unsupported, + [TGSI_OPCODE_ELSE] = aos_simple, + [TGSI_OPCODE_ENDIF] = aos_simple, + [79] = aos_unsupported, + [80] = aos_unsupported, + [TGSI_OPCODE_PUSHA] = aos_unsupported, + [TGSI_OPCODE_POPA] = aos_unsupported, + [TGSI_OPCODE_CEIL] = aos_CEIL, + [TGSI_OPCODE_I2F] = aos_simple, + [TGSI_OPCODE_NOT] = aos_simple, + [TGSI_OPCODE_TRUNC] = aos_simple, + [TGSI_OPCODE_SHL] = aos_simple, + [88] = aos_unsupported, + [TGSI_OPCODE_AND] = aos_simple, + [TGSI_OPCODE_OR] = aos_simple, + [TGSI_OPCODE_MOD] = aos_simple, + [TGSI_OPCODE_XOR] = aos_simple, + [TGSI_OPCODE_SAD] = aos_SAD, + [TGSI_OPCODE_TXF] = aos_tex, + [TGSI_OPCODE_TXQ] = aos_tex, + [TGSI_OPCODE_CONT] = aos_CONT, + [TGSI_OPCODE_EMIT] = aos_simple, + [TGSI_OPCODE_ENDPRIM] = aos_simple, + [TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP, + [TGSI_OPCODE_BGNSUB] = aos_unsupported, + [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP, + [TGSI_OPCODE_ENDSUB] = aos_unsupported, + [TGSI_OPCODE_TXQ_LZ] = aos_tex, + [104] = aos_unsupported, + [105] = aos_unsupported, + [106] = aos_unsupported, + [TGSI_OPCODE_NOP] = aos_simple, + [108] = aos_unsupported, + [109] = aos_unsupported, + [110] = aos_unsupported, + [111] = aos_unsupported, + [TGSI_OPCODE_NRM4] = aos_NRM4, + [TGSI_OPCODE_CALLNZ] = aos_unsupported, + [TGSI_OPCODE_BREAKC] = aos_unsupported, + [TGSI_OPCODE_KIL] = aos_simple, + [TGSI_OPCODE_END] = aos_simple, + [118] = aos_unsupported, + [TGSI_OPCODE_F2I] = aos_simple, + [TGSI_OPCODE_IDIV] = aos_simple, + [TGSI_OPCODE_IMAX] = aos_simple, + [TGSI_OPCODE_IMIN] = aos_simple, + [TGSI_OPCODE_INEG] = aos_simple, + [TGSI_OPCODE_ISGE] = aos_set_on_cond, + [TGSI_OPCODE_ISHR] = aos_simple, + [TGSI_OPCODE_ISLT] = aos_set_on_cond, + [TGSI_OPCODE_F2U] = aos_simple, + [TGSI_OPCODE_U2F] = aos_simple, + [TGSI_OPCODE_UADD] = aos_simple, + [TGSI_OPCODE_UDIV] = aos_simple, + [TGSI_OPCODE_UMAD] = aos_simple, + [TGSI_OPCODE_UMAX] = aos_simple, + [TGSI_OPCODE_UMIN] = aos_simple, + [TGSI_OPCODE_UMOD] = aos_simple, + [TGSI_OPCODE_UMUL] = aos_simple, + [TGSI_OPCODE_USEQ] = aos_set_on_cond, + [TGSI_OPCODE_USGE] = aos_set_on_cond, + [TGSI_OPCODE_USHR] = aos_simple, + [TGSI_OPCODE_USLT] = aos_set_on_cond, + [TGSI_OPCODE_USNE] = aos_set_on_cond, + [TGSI_OPCODE_SWITCH] = aos_unsupported, + [TGSI_OPCODE_CASE] = aos_unsupported, + [TGSI_OPCODE_DEFAULT] = aos_unsupported, + [TGSI_OPCODE_ENDSWITCH] = aos_unsupported, + [TGSI_OPCODE_SAMPLE] = aos_sample, + [TGSI_OPCODE_SAMPLE_I] = aos_sample, + [TGSI_OPCODE_SAMPLE_I_MS] = aos_sample, + [TGSI_OPCODE_SAMPLE_B] = aos_sample, + [TGSI_OPCODE_SAMPLE_C] = aos_sample, + [TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample, + [TGSI_OPCODE_SAMPLE_D] = aos_sample, + [TGSI_OPCODE_SAMPLE_L] = aos_sample, + [TGSI_OPCODE_GATHER4] = aos_sample, + [TGSI_OPCODE_SVIEWINFO] = aos_sample, + [TGSI_OPCODE_SAMPLE_POS] = aos_sample, + [TGSI_OPCODE_SAMPLE_INFO] = aos_sample, + [TGSI_OPCODE_UARL] = aos_simple, + [TGSI_OPCODE_UCMP] = aos_compare, + [TGSI_OPCODE_IABS] = aos_simple, + [TGSI_OPCODE_ISSG] = aos_set_sign, + [TGSI_OPCODE_LOAD] = aos_unsupported, + [TGSI_OPCODE_STORE] = aos_unsupported, + [TGSI_OPCODE_MFENCE] = aos_unsupported, + [TGSI_OPCODE_LFENCE] = aos_unsupported, + [TGSI_OPCODE_SFENCE] = aos_unsupported, + [TGSI_OPCODE_BARRIER] = aos_unsupported, + [TGSI_OPCODE_ATOMUADD] = aos_unsupported, + [TGSI_OPCODE_ATOMXCHG] = aos_unsupported, + [TGSI_OPCODE_ATOMCAS] = aos_unsupported, + [TGSI_OPCODE_ATOMAND] = aos_unsupported, + [TGSI_OPCODE_ATOMOR] = aos_unsupported, + [TGSI_OPCODE_ATOMXOR] = aos_unsupported, + [TGSI_OPCODE_ATOMUMIN] = aos_unsupported, + [TGSI_OPCODE_ATOMUMAX] = aos_unsupported, + [TGSI_OPCODE_ATOMIMIN] = aos_unsupported, + [TGSI_OPCODE_ATOMIMAX] = aos_unsupported, + [TGSI_OPCODE_TEX2] = aos_tex, + [TGSI_OPCODE_TXB2] = aos_tex, + [TGSI_OPCODE_TXL2] = aos_tex, +}; + +static void +soa_passthrough(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + const toy_tgsi_translate translate = + aos_translate_table[tgsi_inst->Instruction.Opcode]; + + translate(tc, tgsi_inst, dst_, src_); +} + +static void +soa_per_channel(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4]; + struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; + int i, ch; + + for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) + tdst_transpose(dst_[i], dst[i]); + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + tsrc_transpose(src_[i], src[i]); + + /* emit the same instruction four times for the four channels */ + for (ch = 0; ch < 4; ch++) { + struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS]; + struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS]; + + for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) + aos_dst[i] = dst[i][ch]; + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + aos_src[i] = src[i][ch]; + + aos_translate_table[tgsi_inst->Instruction.Opcode](tc, + tgsi_inst, aos_dst, aos_src); + } +} + +static void +soa_scalar_replicate(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4], tmp; + struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS]; + int opcode, i; + + assert(tgsi_inst->Instruction.NumDstRegs == 1); + + tdst_transpose(dst_[0], dst0); + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { + struct toy_src tmp[4]; + + tsrc_transpose(src_[i], tmp); + /* only the X channels */ + srcx[i] = tmp[0]; + } + + tmp = tc_alloc_tmp(tc); + + opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; + assert(opcode); + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SQRT: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + tc_add1(tc, opcode, tmp, srcx[0]); + break; + case TGSI_OPCODE_POW: + tc_add2(tc, opcode, tmp, srcx[0], srcx[1]); + break; + default: + assert(!"invalid soa_scalar_replicate() call"); + return; + } + + /* replicate the result */ + for (i = 0; i < 4; i++) + tc_MOV(tc, dst0[i], tsrc_from(tmp)); +} + +static void +soa_dot_product(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4], tmp; + struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; + int i; + + tdst_transpose(dst_[0], dst0); + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + tsrc_transpose(src_[i], src[i]); + + tmp = tc_alloc_tmp(tc); + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_DP2: + tc_MUL(tc, tmp, src[0][1], src[1][1]); + tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); + break; + case TGSI_OPCODE_DP2A: + tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]); + tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); + break; + case TGSI_OPCODE_DP3: + tc_MUL(tc, tmp, src[0][2], src[1][2]); + tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); + tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); + break; + case TGSI_OPCODE_DPH: + tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]); + tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); + tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); + break; + case TGSI_OPCODE_DP4: + tc_MUL(tc, tmp, src[0][3], src[1][3]); + tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp)); + tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); + tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); + break; + default: + assert(!"invalid soa_dot_product() call"); + return; + } + + for (i = 0; i < 4; i++) + tc_MOV(tc, dst0[i], tsrc_from(tmp)); +} + +static void +soa_partial_derivative(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX) + tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]); + else + tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]); +} + +static void +soa_if(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_src src0[4]; + + assert(tsrc_is_swizzle1(src_[0])); + tsrc_transpose(src_[0], src0); + + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF) + tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ); + else + tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ); +} + +static void +soa_LIT(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_inst *inst; + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); + tc_MOV(tc, dst0[1], src0[0]); + tc_POW(tc, dst0[2], src0[1], src0[3]); + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); + + /* + * POW is calculated first because math with pred_ctrl is broken here. + * But, why? + */ + tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L); + inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f)); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; + inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); + inst->pred_ctrl = BRW_PREDICATE_NORMAL; +} + +static void +soa_EXP(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + assert(!"SoA EXP untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + if (!tdst_is_null(dst0[0])) { + struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); + + tc_RNDD(tc, tmp, src0[0]); + + /* construct the floating point number manually */ + tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); + tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23)); + } + + tc_FRC(tc, dst0[1], src0[0]); + tc_EXP(tc, dst0[2], src0[0]); + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); +} + +static void +soa_LOG(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + assert(!"SoA LOG untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + if (dst_[0].writemask & TOY_WRITEMASK_XY) { + struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); + + /* exponent */ + tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23)); + tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127)); + + /* mantissa */ + tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1)); + tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23)); + } + + tc_LOG(tc, dst0[2], src0[0]); + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); +} + +static void +soa_DST(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src[2][4]; + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src[0]); + tsrc_transpose(src_[1], src[1]); + + tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); + tc_MUL(tc, dst0[1], src[0][1], src[1][1]); + tc_MOV(tc, dst0[2], src[0][2]); + tc_MOV(tc, dst0[3], src[1][3]); +} + +static void +soa_XPD(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src[2][4]; + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src[0]); + tsrc_transpose(src_[1], src[1]); + + /* dst.x = src0.y * src1.z - src1.y * src0.z */ + tc_MUL(tc, dst0[0], src[0][2], src[1][1]); + tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0]))); + + /* dst.y = src0.z * src1.x - src1.z * src0.x */ + tc_MUL(tc, dst0[1], src[0][0], src[1][2]); + tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1]))); + + /* dst.z = src0.x * src1.y - src1.x * src0.y */ + tc_MUL(tc, dst0[2], src[0][1], src[1][0]); + tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2]))); + + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); +} + +static void +soa_PK2H(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); + struct toy_dst dst0[4]; + struct toy_src src0[4]; + int i; + + assert(!"SoA PK2H untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16)); + tc_OR(tc, tmp, src0[0], tsrc_from(tmp)); + + for (i = 0; i < 4; i++) + tc_MOV(tc, dst0[i], tsrc_from(tmp)); +} + +static void +soa_UP2H(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + assert(!"SoA UP2H untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff)); + tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16)); + tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff)); + tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16)); + +} + +static void +soa_SCS(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]); + tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]); + tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); +} + +static void +soa_NRM(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + const struct toy_dst tmp = tc_alloc_tmp(tc); + struct toy_dst dst0[4]; + struct toy_src src0[4]; + + assert(!"SoA NRM untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_MUL(tc, tmp, src0[2], src0[2]); + tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp)); + tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp)); + tc_INV(tc, tmp, tsrc_from(tmp)); + + tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp)); + tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp)); + tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp)); + tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); +} + +static void +soa_NRM4(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + const struct toy_dst tmp = tc_alloc_tmp(tc); + struct toy_dst dst0[4]; + struct toy_src src0[4]; + int i; + + assert(!"SoA NRM4 untested"); + + tdst_transpose(dst_[0], dst0); + tsrc_transpose(src_[0], src0); + + tc_MUL(tc, tmp, src0[3], src0[3]); + tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp)); + tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp)); + tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp)); + tc_INV(tc, tmp, tsrc_from(tmp)); + + for (i = 0; i < 4; i++) + tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp)); +} + +static void +soa_unsupported(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst_, + struct toy_src *src_) +{ + const struct tgsi_opcode_info *info = + tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode); + + ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n", + info->mnemonic); + + tc_fail(tc, "unsupported TGSI instruction in SoA form"); +} + +static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { + [TGSI_OPCODE_ARL] = soa_per_channel, + [TGSI_OPCODE_MOV] = soa_per_channel, + [TGSI_OPCODE_LIT] = soa_LIT, + [TGSI_OPCODE_RCP] = soa_scalar_replicate, + [TGSI_OPCODE_RSQ] = soa_scalar_replicate, + [TGSI_OPCODE_EXP] = soa_EXP, + [TGSI_OPCODE_LOG] = soa_LOG, + [TGSI_OPCODE_MUL] = soa_per_channel, + [TGSI_OPCODE_ADD] = soa_per_channel, + [TGSI_OPCODE_DP3] = soa_dot_product, + [TGSI_OPCODE_DP4] = soa_dot_product, + [TGSI_OPCODE_DST] = soa_DST, + [TGSI_OPCODE_MIN] = soa_per_channel, + [TGSI_OPCODE_MAX] = soa_per_channel, + [TGSI_OPCODE_SLT] = soa_per_channel, + [TGSI_OPCODE_SGE] = soa_per_channel, + [TGSI_OPCODE_MAD] = soa_per_channel, + [TGSI_OPCODE_SUB] = soa_per_channel, + [TGSI_OPCODE_LRP] = soa_per_channel, + [TGSI_OPCODE_CND] = soa_per_channel, + [TGSI_OPCODE_SQRT] = soa_scalar_replicate, + [TGSI_OPCODE_DP2A] = soa_dot_product, + [22] = soa_unsupported, + [23] = soa_unsupported, + [TGSI_OPCODE_FRC] = soa_per_channel, + [TGSI_OPCODE_CLAMP] = soa_per_channel, + [TGSI_OPCODE_FLR] = soa_per_channel, + [TGSI_OPCODE_ROUND] = soa_per_channel, + [TGSI_OPCODE_EX2] = soa_scalar_replicate, + [TGSI_OPCODE_LG2] = soa_scalar_replicate, + [TGSI_OPCODE_POW] = soa_scalar_replicate, + [TGSI_OPCODE_XPD] = soa_XPD, + [32] = soa_unsupported, + [TGSI_OPCODE_ABS] = soa_per_channel, + [TGSI_OPCODE_RCC] = soa_unsupported, + [TGSI_OPCODE_DPH] = soa_dot_product, + [TGSI_OPCODE_COS] = soa_scalar_replicate, + [TGSI_OPCODE_DDX] = soa_partial_derivative, + [TGSI_OPCODE_DDY] = soa_partial_derivative, + [TGSI_OPCODE_KILP] = soa_passthrough, + [TGSI_OPCODE_PK2H] = soa_PK2H, + [TGSI_OPCODE_PK2US] = soa_unsupported, + [TGSI_OPCODE_PK4B] = soa_unsupported, + [TGSI_OPCODE_PK4UB] = soa_unsupported, + [TGSI_OPCODE_RFL] = soa_unsupported, + [TGSI_OPCODE_SEQ] = soa_per_channel, + [TGSI_OPCODE_SFL] = soa_per_channel, + [TGSI_OPCODE_SGT] = soa_per_channel, + [TGSI_OPCODE_SIN] = soa_scalar_replicate, + [TGSI_OPCODE_SLE] = soa_per_channel, + [TGSI_OPCODE_SNE] = soa_per_channel, + [TGSI_OPCODE_STR] = soa_per_channel, + [TGSI_OPCODE_TEX] = soa_passthrough, + [TGSI_OPCODE_TXD] = soa_passthrough, + [TGSI_OPCODE_TXP] = soa_passthrough, + [TGSI_OPCODE_UP2H] = soa_UP2H, + [TGSI_OPCODE_UP2US] = soa_unsupported, + [TGSI_OPCODE_UP4B] = soa_unsupported, + [TGSI_OPCODE_UP4UB] = soa_unsupported, + [TGSI_OPCODE_X2D] = soa_unsupported, + [TGSI_OPCODE_ARA] = soa_unsupported, + [TGSI_OPCODE_ARR] = soa_per_channel, + [TGSI_OPCODE_BRA] = soa_unsupported, + [TGSI_OPCODE_CAL] = soa_unsupported, + [TGSI_OPCODE_RET] = soa_unsupported, + [TGSI_OPCODE_SSG] = soa_per_channel, + [TGSI_OPCODE_CMP] = soa_per_channel, + [TGSI_OPCODE_SCS] = soa_SCS, + [TGSI_OPCODE_TXB] = soa_passthrough, + [TGSI_OPCODE_NRM] = soa_NRM, + [TGSI_OPCODE_DIV] = soa_per_channel, + [TGSI_OPCODE_DP2] = soa_dot_product, + [TGSI_OPCODE_TXL] = soa_passthrough, + [TGSI_OPCODE_BRK] = soa_passthrough, + [TGSI_OPCODE_IF] = soa_if, + [TGSI_OPCODE_UIF] = soa_if, + [76] = soa_unsupported, + [TGSI_OPCODE_ELSE] = soa_passthrough, + [TGSI_OPCODE_ENDIF] = soa_passthrough, + [79] = soa_unsupported, + [80] = soa_unsupported, + [TGSI_OPCODE_PUSHA] = soa_unsupported, + [TGSI_OPCODE_POPA] = soa_unsupported, + [TGSI_OPCODE_CEIL] = soa_per_channel, + [TGSI_OPCODE_I2F] = soa_per_channel, + [TGSI_OPCODE_NOT] = soa_per_channel, + [TGSI_OPCODE_TRUNC] = soa_per_channel, + [TGSI_OPCODE_SHL] = soa_per_channel, + [88] = soa_unsupported, + [TGSI_OPCODE_AND] = soa_per_channel, + [TGSI_OPCODE_OR] = soa_per_channel, + [TGSI_OPCODE_MOD] = soa_per_channel, + [TGSI_OPCODE_XOR] = soa_per_channel, + [TGSI_OPCODE_SAD] = soa_per_channel, + [TGSI_OPCODE_TXF] = soa_passthrough, + [TGSI_OPCODE_TXQ] = soa_passthrough, + [TGSI_OPCODE_CONT] = soa_passthrough, + [TGSI_OPCODE_EMIT] = soa_unsupported, + [TGSI_OPCODE_ENDPRIM] = soa_unsupported, + [TGSI_OPCODE_BGNLOOP] = soa_passthrough, + [TGSI_OPCODE_BGNSUB] = soa_unsupported, + [TGSI_OPCODE_ENDLOOP] = soa_passthrough, + [TGSI_OPCODE_ENDSUB] = soa_unsupported, + [TGSI_OPCODE_TXQ_LZ] = soa_passthrough, + [104] = soa_unsupported, + [105] = soa_unsupported, + [106] = soa_unsupported, + [TGSI_OPCODE_NOP] = soa_passthrough, + [108] = soa_unsupported, + [109] = soa_unsupported, + [110] = soa_unsupported, + [111] = soa_unsupported, + [TGSI_OPCODE_NRM4] = soa_NRM4, + [TGSI_OPCODE_CALLNZ] = soa_unsupported, + [TGSI_OPCODE_BREAKC] = soa_unsupported, + [TGSI_OPCODE_KIL] = soa_passthrough, + [TGSI_OPCODE_END] = soa_passthrough, + [118] = soa_unsupported, + [TGSI_OPCODE_F2I] = soa_per_channel, + [TGSI_OPCODE_IDIV] = soa_per_channel, + [TGSI_OPCODE_IMAX] = soa_per_channel, + [TGSI_OPCODE_IMIN] = soa_per_channel, + [TGSI_OPCODE_INEG] = soa_per_channel, + [TGSI_OPCODE_ISGE] = soa_per_channel, + [TGSI_OPCODE_ISHR] = soa_per_channel, + [TGSI_OPCODE_ISLT] = soa_per_channel, + [TGSI_OPCODE_F2U] = soa_per_channel, + [TGSI_OPCODE_U2F] = soa_per_channel, + [TGSI_OPCODE_UADD] = soa_per_channel, + [TGSI_OPCODE_UDIV] = soa_per_channel, + [TGSI_OPCODE_UMAD] = soa_per_channel, + [TGSI_OPCODE_UMAX] = soa_per_channel, + [TGSI_OPCODE_UMIN] = soa_per_channel, + [TGSI_OPCODE_UMOD] = soa_per_channel, + [TGSI_OPCODE_UMUL] = soa_per_channel, + [TGSI_OPCODE_USEQ] = soa_per_channel, + [TGSI_OPCODE_USGE] = soa_per_channel, + [TGSI_OPCODE_USHR] = soa_per_channel, + [TGSI_OPCODE_USLT] = soa_per_channel, + [TGSI_OPCODE_USNE] = soa_per_channel, + [TGSI_OPCODE_SWITCH] = soa_unsupported, + [TGSI_OPCODE_CASE] = soa_unsupported, + [TGSI_OPCODE_DEFAULT] = soa_unsupported, + [TGSI_OPCODE_ENDSWITCH] = soa_unsupported, + [TGSI_OPCODE_SAMPLE] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_I] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_B] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_C] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_D] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_L] = soa_passthrough, + [TGSI_OPCODE_GATHER4] = soa_passthrough, + [TGSI_OPCODE_SVIEWINFO] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_POS] = soa_passthrough, + [TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough, + [TGSI_OPCODE_UARL] = soa_per_channel, + [TGSI_OPCODE_UCMP] = soa_per_channel, + [TGSI_OPCODE_IABS] = soa_per_channel, + [TGSI_OPCODE_ISSG] = soa_per_channel, + [TGSI_OPCODE_LOAD] = soa_unsupported, + [TGSI_OPCODE_STORE] = soa_unsupported, + [TGSI_OPCODE_MFENCE] = soa_unsupported, + [TGSI_OPCODE_LFENCE] = soa_unsupported, + [TGSI_OPCODE_SFENCE] = soa_unsupported, + [TGSI_OPCODE_BARRIER] = soa_unsupported, + [TGSI_OPCODE_ATOMUADD] = soa_unsupported, + [TGSI_OPCODE_ATOMXCHG] = soa_unsupported, + [TGSI_OPCODE_ATOMCAS] = soa_unsupported, + [TGSI_OPCODE_ATOMAND] = soa_unsupported, + [TGSI_OPCODE_ATOMOR] = soa_unsupported, + [TGSI_OPCODE_ATOMXOR] = soa_unsupported, + [TGSI_OPCODE_ATOMUMIN] = soa_unsupported, + [TGSI_OPCODE_ATOMUMAX] = soa_unsupported, + [TGSI_OPCODE_ATOMIMIN] = soa_unsupported, + [TGSI_OPCODE_ATOMIMAX] = soa_unsupported, + [TGSI_OPCODE_TEX2] = soa_unsupported, + [TGSI_OPCODE_TXB2] = soa_unsupported, + [TGSI_OPCODE_TXL2] = soa_unsupported, +}; + +static bool +ra_dst_is_indirect(const struct tgsi_full_dst_register *d) +{ + return (d->Register.Indirect || + (d->Register.Dimension && d->Dimension.Indirect)); +} + +static int +ra_dst_index(const struct tgsi_full_dst_register *d) +{ + assert(!d->Register.Indirect); + return d->Register.Index; +} + +static int +ra_dst_dimension(const struct tgsi_full_dst_register *d) +{ + if (d->Register.Dimension) { + assert(!d->Dimension.Indirect); + return d->Dimension.Index; + } + else { + return 0; + } +} + +static bool +ra_is_src_indirect(const struct tgsi_full_src_register *s) +{ + return (s->Register.Indirect || + (s->Register.Dimension && s->Dimension.Indirect)); +} + +static int +ra_src_index(const struct tgsi_full_src_register *s) +{ + assert(!s->Register.Indirect); + return s->Register.Index; +} + +static int +ra_src_dimension(const struct tgsi_full_src_register *s) +{ + if (s->Register.Dimension) { + assert(!s->Dimension.Indirect); + return s->Dimension.Index; + } + else { + return 0; + } +} + +/** + * Infer the type of either the sources or the destination. + */ +static enum toy_type +ra_infer_opcode_type(int tgsi_opcode, bool is_dst) +{ + enum toy_type type; + + if (is_dst) { + bool type_valid = false; + + switch (tgsi_opcode) { + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXQ_LZ: + case TGSI_OPCODE_SAMPLE_I: + case TGSI_OPCODE_SAMPLE_I_MS: + case TGSI_OPCODE_SAMPLE_POS: + type = TOY_TYPE_F; + type_valid = true; + break; + case TGSI_OPCODE_ARL: + case TGSI_OPCODE_ARR: + case TGSI_OPCODE_F2I: + type = TOY_TYPE_D; + type_valid = true; + break; + case TGSI_OPCODE_F2U: + type = TOY_TYPE_UD; + type_valid = true; + break; + default: + break; + } + + if (type_valid) + return type; + } + + switch (tgsi_opcode) { + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_MOD: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: /* why? */ + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXQ_LZ: + case TGSI_OPCODE_IDIV: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_UARL: /* why? */ + case TGSI_OPCODE_IABS: + case TGSI_OPCODE_ISSG: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + type = TOY_TYPE_D; + break; + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USHR: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_SAMPLE_I: + case TGSI_OPCODE_SAMPLE_I_MS: + case TGSI_OPCODE_SVIEWINFO: + case TGSI_OPCODE_SAMPLE_POS: + case TGSI_OPCODE_SAMPLE_INFO: + case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_STORE: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + type = TOY_TYPE_UD; + break; + default: + type = TOY_TYPE_F; + break; + } + + return type; +} + +/** + * Return the type of an operand of the specified instruction. + */ +static enum toy_type +ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst, + int operand, bool is_dst) +{ + enum toy_type type; + enum tgsi_file_type file; + + /* we need to look at both src and dst for MOV */ + /* XXX it should not be this complex */ + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) { + const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File; + const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File; + + if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) { + type = TOY_TYPE_D; + } + else if (src_file == TGSI_FILE_IMMEDIATE && + !tgsi_inst->Src[0].Register.Indirect) { + const int src_idx = tgsi_inst->Src[0].Register.Index; + type = tgsi->imm_data.types[src_idx]; + } + else { + /* this is the best we can do */ + type = TOY_TYPE_F; + } + + return type; + } + + type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst); + + /* fix the type */ + file = (is_dst) ? + tgsi_inst->Dst[operand].Register.File : + tgsi_inst->Src[operand].Register.File; + switch (file) { + case TGSI_FILE_SAMPLER: + case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: + type = TOY_TYPE_D; + break; + case TGSI_FILE_ADDRESS: + assert(type == TOY_TYPE_D); + break; + default: + break; + } + + return type; +} + +/** + * Allocate a VRF register. + */ +static int +ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file) +{ + const int count = (tgsi->aos) ? 1 : 4; + return tc_alloc_vrf(tgsi->tc, count); +} + +/** + * Construct the key for VRF mapping look-up. + */ +static void * +ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index) +{ + intptr_t key; + + /* this is ugly... */ + assert(file < 1 << 4); + assert(dim < 1 << 12); + assert(index < 1 << 16); + key = (file << 28) | (dim << 16) | index; + + return intptr_to_pointer(key); +} + +/** + * Map a TGSI register to a VRF register. + */ +static int +ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file, + int dim, int index, bool *is_new) +{ + void *key, *val; + intptr_t vrf; + + key = ra_get_map_key(file, dim, index); + + /* + * because we allocate vrf from 1 and on, val is never NULL as long as the + * key exists + */ + val = util_hash_table_get(tgsi->reg_mapping, key); + if (val) { + vrf = pointer_to_intptr(val); + + if (is_new) + *is_new = false; + } + else { + vrf = (intptr_t) ra_alloc_reg(tgsi, file); + + /* add to the mapping */ + val = intptr_to_pointer(vrf); + util_hash_table_set(tgsi->reg_mapping, key, val); + + if (is_new) + *is_new = true; + } + + return (int) vrf; +} + +/** + * Return true if the destination aliases any of the sources. + */ +static bool +ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index) +{ + const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; + int i; + + /* we need a scratch register for indirect dst anyway */ + if (ra_dst_is_indirect(d)) + return true; + + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; + + if (s->Register.File != d->Register.File) + continue; + + /* + * we can go on to check dimension and index respectively, but + * keep it simple for now + */ + if (ra_is_src_indirect(s)) + return true; + if (ra_src_dimension(s) == ra_dst_dimension(d) && + ra_src_index(s) == ra_dst_index(d)) + return true; + } + + return false; +} + +/** + * Return the toy register for a TGSI destination operand. + */ +static struct toy_dst +ra_get_dst(struct toy_tgsi *tgsi, + const struct tgsi_full_instruction *tgsi_inst, int dst_index, + bool *is_scratch) +{ + const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; + bool need_vrf = false; + struct toy_dst dst; + + switch (d->Register.File) { + case TGSI_FILE_NULL: + dst = tdst_null(); + break; + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_PREDICATE: + need_vrf = true; + break; + default: + assert(!"unhandled dst file"); + dst = tdst_null(); + break; + } + + if (need_vrf) { + /* XXX we do not always need a scratch given the conditions... */ + const bool need_scratch = + (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) || + tgsi_inst->Instruction.Saturate); + const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true); + int vrf; + + if (need_scratch) { + vrf = ra_alloc_reg(tgsi, d->Register.File); + } + else { + vrf = ra_map_reg(tgsi, d->Register.File, + ra_dst_dimension(d), ra_dst_index(d), NULL); + } + + if (is_scratch) + *is_scratch = need_scratch; + + dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, + false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); + } + + return dst; +} + +static struct toy_src +ra_get_src_for_vrf(const struct tgsi_full_src_register *s, + enum toy_type type, int vrf) +{ + return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, + false, 0, + s->Register.SwizzleX, s->Register.SwizzleY, + s->Register.SwizzleZ, s->Register.SwizzleW, + s->Register.Absolute, s->Register.Negate, + vrf * TOY_REG_WIDTH); +} + +static int +init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst, + enum tgsi_file_type file, int index, + const struct tgsi_ind_register *indirect, + const struct tgsi_dimension *dimension, + const struct tgsi_ind_register *dim_indirect) +{ + struct toy_src src; + int num_src = 0; + + /* src[0]: TGSI file */ + inst->src[num_src++] = tsrc_imm_d(file); + + /* src[1]: TGSI dimension */ + inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0); + + /* src[2]: TGSI dimension indirection */ + if (dim_indirect) { + const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0, + dim_indirect->Index, NULL); + + src = tsrc(TOY_FILE_VRF, vrf, 0); + src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); + } + else { + src = tsrc_imm_d(0); + } + + inst->src[num_src++] = src; + + /* src[3]: TGSI index */ + inst->src[num_src++] = tsrc_imm_d(index); + + /* src[4]: TGSI index indirection */ + if (indirect) { + const int vrf = ra_map_reg(tgsi, indirect->File, 0, + indirect->Index, NULL); + + src = tsrc(TOY_FILE_VRF, vrf, 0); + src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); + } + else { + src = tsrc_imm_d(0); + } + + inst->src[num_src++] = src; + + return num_src; +} + +static struct toy_src +ra_get_src_indirect(struct toy_tgsi *tgsi, + const struct tgsi_full_instruction *tgsi_inst, + int src_index) +{ + const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; + bool need_vrf = false, is_resource = false; + struct toy_src src; + + switch (s->Register.File) { + case TGSI_FILE_NULL: + src = tsrc_null(); + break; + case TGSI_FILE_SAMPLER: + case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: + is_resource = true; + /* fall through */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_IMMEDIATE: + case TGSI_FILE_PREDICATE: + need_vrf = true; + break; + default: + assert(!"unhandled src file"); + src = tsrc_null(); + break; + } + + if (need_vrf) { + const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); + int vrf; + + if (is_resource) { + assert(!s->Register.Dimension); + assert(s->Register.Indirect); + + vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL); + } + else { + vrf = ra_alloc_reg(tgsi, s->Register.File); + } + + src = ra_get_src_for_vrf(s, type, vrf); + + /* emit indirect fetch */ + if (!is_resource) { + struct toy_inst *inst; + + inst = tc_add(tgsi->tc); + inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH; + inst->dst = tdst_from(src); + inst->dst.writemask = TOY_WRITEMASK_XYZW; + + init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index, + (s->Register.Indirect) ? &s->Indirect : NULL, + (s->Register.Dimension) ? &s->Dimension : NULL, + (s->Dimension.Indirect) ? &s->DimIndirect : NULL); + } + } + + return src; +} + +/** + * Return the toy register for a TGSI source operand. + */ +static struct toy_src +ra_get_src(struct toy_tgsi *tgsi, + const struct tgsi_full_instruction *tgsi_inst, + int src_index) +{ + const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; + bool need_vrf = false; + struct toy_src src; + + if (ra_is_src_indirect(s)) + return ra_get_src_indirect(tgsi, tgsi_inst, src_index); + + switch (s->Register.File) { + case TGSI_FILE_NULL: + src = tsrc_null(); + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: + need_vrf = true; + break; + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_PREDICATE: + need_vrf = true; + break; + case TGSI_FILE_SAMPLER: + case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: + assert(!s->Register.Dimension); + src = tsrc_imm_d(s->Register.Index); + break; + case TGSI_FILE_IMMEDIATE: + { + const uint32_t *imm; + enum toy_type imm_type; + bool is_scalar; + + imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type); + + is_scalar = + (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] && + imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] && + imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]); + + if (is_scalar) { + const enum toy_type type = + ra_get_type(tgsi, tgsi_inst, src_index, false); + + /* ignore imm_type */ + src = tsrc_imm_ud(imm[s->Register.SwizzleX]); + src.type = type; + src.absolute = s->Register.Absolute; + src.negate = s->Register.Negate; + } + else { + need_vrf = true; + } + } + break; + default: + assert(!"unhandled src file"); + src = tsrc_null(); + break; + } + + if (need_vrf) { + const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); + bool is_new; + int vrf; + + vrf = ra_map_reg(tgsi, s->Register.File, + ra_src_dimension(s), ra_src_index(s), &is_new); + + src = ra_get_src_for_vrf(s, type, vrf); + + if (is_new) { + switch (s->Register.File) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_PREDICATE: + { + struct toy_dst dst = tdst_from(src); + dst.writemask = TOY_WRITEMASK_XYZW; + + /* + * Always initialize registers. Otherwise, if the random value + * ends up in a VUE, FS may fail to interpolate correctly. + */ + tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type)); + } + break; + default: + break; + } + } + + } + + return src; +} + +static void +parse_instruction(struct toy_tgsi *tgsi, + const struct tgsi_full_instruction *tgsi_inst) +{ + struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS]; + struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS]; + bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS]; + toy_tgsi_translate translate; + int i; + + /* convert TGSI registers to toy registers */ + for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) + src[i] = ra_get_src(tgsi, tgsi_inst, i); + for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) + dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]); + + /* translate the instruction */ + translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode]; + translate(tgsi->tc, tgsi_inst, dst, src); + + /* write the result to the real destinations if needed */ + for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; + + if (!dst_is_scratch[i]) + continue; + + if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE) + tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled"); + + tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate; + + /* emit indirect store */ + if (ra_dst_is_indirect(d)) { + struct toy_inst *inst; + + inst = tc_add(tgsi->tc); + inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE; + inst->dst = dst[i]; + + init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index, + (d->Register.Indirect) ? &d->Indirect : NULL, + (d->Register.Dimension) ? &d->Dimension : NULL, + (d->Dimension.Indirect) ? &d->DimIndirect : NULL); + } + else { + const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true); + struct toy_dst real_dst; + int vrf; + + vrf = ra_map_reg(tgsi, d->Register.File, + ra_dst_dimension(d), ra_dst_index(d), NULL); + real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, + false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); + + if (tgsi->aos) { + tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i])); + } + else { + struct toy_dst tdst[4]; + struct toy_src tsrc[4]; + int j; + + tdst_transpose(real_dst, tdst); + tsrc_transpose(tsrc_from(dst[i]), tsrc); + + for (j = 0; j < 4; j++) + tc_MOV(tgsi->tc, tdst[j], tsrc[j]); + } + } + + tgsi->tc->templ.saturate = false; + } + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KILP: + tgsi->uses_kill = true; + break; + } + + /* remember channels written */ + for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; + + if (d->Register.File != TGSI_FILE_OUTPUT) + continue; + for (i = 0; i < tgsi->num_outputs; i++) { + if (tgsi->outputs[i].index == d->Register.Index) { + tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask; + break; + } + } + } +} + +static void +decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) +{ + static const struct tgsi_declaration_interp default_interp = { + TGSI_INTERPOLATE_PERSPECTIVE, false, 0, + }; + const struct tgsi_declaration_interp *interp = + (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp; + int index; + + if (decl->Range.Last >= Elements(tgsi->inputs)) { + assert(!"invalid IN"); + return; + } + + for (index = decl->Range.First; index <= decl->Range.Last; index++) { + const int slot = tgsi->num_inputs++; + + tgsi->inputs[slot].index = index; + tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask; + if (decl->Declaration.Semantic) { + tgsi->inputs[slot].semantic_name = decl->Semantic.Name; + tgsi->inputs[slot].semantic_index = decl->Semantic.Index; + } + else { + tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC; + tgsi->inputs[slot].semantic_index = index; + } + tgsi->inputs[slot].interp = interp->Interpolate; + tgsi->inputs[slot].centroid = interp->Centroid; + } +} + +static void +decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) +{ + int index; + + if (decl->Range.Last >= Elements(tgsi->outputs)) { + assert(!"invalid OUT"); + return; + } + + assert(decl->Declaration.Semantic); + + for (index = decl->Range.First; index <= decl->Range.Last; index++) { + const int slot = tgsi->num_outputs++; + + tgsi->outputs[slot].index = index; + tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW; + tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask; + tgsi->outputs[slot].semantic_name = decl->Semantic.Name; + tgsi->outputs[slot].semantic_index = decl->Semantic.Index; + } +} + +static void +decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) +{ + int index; + + if (decl->Range.Last >= Elements(tgsi->system_values)) { + assert(!"invalid SV"); + return; + } + + for (index = decl->Range.First; index <= decl->Range.Last; index++) { + const int slot = tgsi->num_system_values++; + + tgsi->system_values[slot].index = index; + if (decl->Declaration.Semantic) { + tgsi->system_values[slot].semantic_name = decl->Semantic.Name; + tgsi->system_values[slot].semantic_index = decl->Semantic.Index; + } + else { + tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC; + tgsi->system_values[slot].semantic_index = index; + } + } +} + +/** + * Emit an instruction to fetch the value of a TGSI register. + */ +static void +fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx) +{ + struct toy_dst dst; + int vrf; + enum toy_opcode opcode; + enum toy_type type = TOY_TYPE_F; + + switch (file) { + case TGSI_FILE_INPUT: + opcode = TOY_OPCODE_TGSI_IN; + break; + case TGSI_FILE_CONSTANT: + opcode = TOY_OPCODE_TGSI_CONST; + break; + case TGSI_FILE_SYSTEM_VALUE: + opcode = TOY_OPCODE_TGSI_SV; + break; + case TGSI_FILE_IMMEDIATE: + opcode = TOY_OPCODE_TGSI_IMM; + toy_tgsi_get_imm(tgsi, idx, &type); + break; + default: + /* no need to fetch */ + return; + break; + } + + vrf = ra_map_reg(tgsi, file, dim, idx, NULL); + dst = tdst(TOY_FILE_VRF, vrf, 0); + dst = tdst_type(dst, type); + + tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx)); +} + +static void +parse_declaration(struct toy_tgsi *tgsi, + const struct tgsi_full_declaration *decl) +{ + int i; + + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + decl_add_in(tgsi, decl); + break; + case TGSI_FILE_OUTPUT: + decl_add_out(tgsi, decl); + break; + case TGSI_FILE_SYSTEM_VALUE: + decl_add_sv(tgsi, decl); + break; + case TGSI_FILE_IMMEDIATE: + /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */ + assert(!"unexpected immediate declaration"); + break; + case TGSI_FILE_NULL: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: + case TGSI_FILE_PREDICATE: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: + /* nothing to do */ + break; + default: + assert(!"unhandled TGSI file"); + break; + } + + /* fetch the registers now */ + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0; + fetch_source(tgsi, decl->Declaration.File, dim, i); + } +} + +static int +add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf) +{ + /* reallocate the buffer if necessary */ + if (tgsi->imm_data.cur >= tgsi->imm_data.size) { + const int cur_size = tgsi->imm_data.size; + int new_size; + enum toy_type *new_types; + uint32_t (*new_buf)[4]; + + new_size = (cur_size) ? cur_size << 1 : 16; + while (new_size <= tgsi->imm_data.cur) + new_size <<= 1; + + new_buf = REALLOC(tgsi->imm_data.buf, + cur_size * sizeof(new_buf[0]), + new_size * sizeof(new_buf[0])); + new_types = REALLOC(tgsi->imm_data.types, + cur_size * sizeof(new_types[0]), + new_size * sizeof(new_types[0])); + if (!new_buf || !new_types) { + if (new_buf) + FREE(new_buf); + if (new_types) + FREE(new_types); + return -1; + } + + tgsi->imm_data.buf = new_buf; + tgsi->imm_data.types = new_types; + tgsi->imm_data.size = new_size; + } + + tgsi->imm_data.types[tgsi->imm_data.cur] = type; + memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur], + buf, sizeof(tgsi->imm_data.buf[0])); + + return tgsi->imm_data.cur++; +} + +static void +parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm) +{ + enum toy_type type; + uint32_t imm_buf[4]; + int idx; + + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + type = TOY_TYPE_F; + imm_buf[0] = fui(imm->u[0].Float); + imm_buf[1] = fui(imm->u[1].Float); + imm_buf[2] = fui(imm->u[2].Float); + imm_buf[3] = fui(imm->u[3].Float); + break; + case TGSI_IMM_INT32: + type = TOY_TYPE_D; + imm_buf[0] = (uint32_t) imm->u[0].Int; + imm_buf[1] = (uint32_t) imm->u[1].Int; + imm_buf[2] = (uint32_t) imm->u[2].Int; + imm_buf[3] = (uint32_t) imm->u[3].Int; + break; + case TGSI_IMM_UINT32: + type = TOY_TYPE_UD; + imm_buf[0] = imm->u[0].Uint; + imm_buf[1] = imm->u[1].Uint; + imm_buf[2] = imm->u[2].Uint; + imm_buf[3] = imm->u[3].Uint; + break; + default: + assert(!"unhandled TGSI imm type"); + type = TOY_TYPE_F; + memset(imm_buf, 0, sizeof(imm_buf)); + break; + } + + idx = add_imm(tgsi, type, imm_buf); + if (idx >= 0) + fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx); + else + tc_fail(tgsi->tc, "failed to add TGSI imm"); +} + +static void +parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop) +{ + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_VS_PROHIBIT_UCPS: + tgsi->props.vs_prohibit_ucps = prop->u[0].Data; + break; + case TGSI_PROPERTY_FS_COORD_ORIGIN: + tgsi->props.fs_coord_origin = prop->u[0].Data; + break; + case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: + tgsi->props.fs_coord_pixel_center = prop->u[0].Data; + break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data; + break; + case TGSI_PROPERTY_FS_DEPTH_LAYOUT: + tgsi->props.fs_depth_layout = prop->u[0].Data; + break; + case TGSI_PROPERTY_GS_INPUT_PRIM: + tgsi->props.gs_input_prim = prop->u[0].Data; + break; + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + tgsi->props.gs_output_prim = prop->u[0].Data; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + tgsi->props.gs_max_output_vertices = prop->u[0].Data; + break; + default: + assert(!"unhandled TGSI property"); + break; + } +} + +static void +parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token) +{ + switch (token->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + parse_declaration(tgsi, &token->FullDeclaration); + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + parse_immediate(tgsi, &token->FullImmediate); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + parse_instruction(tgsi, &token->FullInstruction); + break; + case TGSI_TOKEN_TYPE_PROPERTY: + parse_property(tgsi, &token->FullProperty); + break; + default: + assert(!"unhandled TGSI token type"); + break; + } +} + +static enum pipe_error +dump_reg_mapping(void *key, void *val, void *data) +{ + int tgsi_file, tgsi_dim, tgsi_index; + uint32_t sig, vrf; + + sig = (uint32_t) pointer_to_intptr(key); + vrf = (uint32_t) pointer_to_intptr(val); + + /* see ra_get_map_key() */ + tgsi_file = (sig >> 28) & 0xf; + tgsi_dim = (sig >> 16) & 0xfff; + tgsi_index = (sig >> 0) & 0xffff; + + if (tgsi_dim) { + ilo_printf(" v%d:\t%s[%d][%d]\n", vrf, + tgsi_file_names[tgsi_file], tgsi_dim, tgsi_index); + } + else { + ilo_printf(" v%d:\t%s[%d]\n", vrf, + tgsi_file_names[tgsi_file], tgsi_index); + } + + return PIPE_OK; +} + +/** + * Dump the TGSI translator, currently only the register mapping. + */ +void +toy_tgsi_dump(const struct toy_tgsi *tgsi) +{ + util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL); +} + +/** + * Clean up the TGSI translator. + */ +void +toy_tgsi_cleanup(struct toy_tgsi *tgsi) +{ + FREE(tgsi->imm_data.buf); + FREE(tgsi->imm_data.types); + + util_hash_table_destroy(tgsi->reg_mapping); +} + +static unsigned +reg_mapping_hash(void *key) +{ + return (unsigned) pointer_to_intptr(key); +} + +static int +reg_mapping_compare(void *key1, void *key2) +{ + return (key1 != key2); +} + +/** + * Initialize the TGSI translator. + */ +static bool +init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos) +{ + memset(tgsi, 0, sizeof(*tgsi)); + + tgsi->tc = tc; + tgsi->aos = aos; + tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table; + + /* create a mapping of TGSI registers to VRF reigsters */ + tgsi->reg_mapping = + util_hash_table_create(reg_mapping_hash, reg_mapping_compare); + + return (tgsi->reg_mapping != NULL); +} + +/** + * Translate TGSI tokens into toy instructions. + */ +void +toy_compiler_translate_tgsi(struct toy_compiler *tc, + const struct tgsi_token *tokens, bool aos, + struct toy_tgsi *tgsi) +{ + struct tgsi_parse_context parse; + + if (!init_tgsi(tgsi, tc, aos)) { + tc_fail(tc, "failed to initialize TGSI translator"); + return; + } + + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + parse_token(tgsi, &parse.FullToken); + } + tgsi_parse_free(&parse); +} + +/** + * Map the TGSI register to VRF register. + */ +int +toy_tgsi_get_vrf(const struct toy_tgsi *tgsi, + enum tgsi_file_type file, int dimension, int index) +{ + void *key, *val; + + key = ra_get_map_key(file, dimension, index); + + val = util_hash_table_get(tgsi->reg_mapping, key); + + return (val) ? pointer_to_intptr(val) : -1; +} diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h new file mode 100644 index 00000000000..1bfb57f6c7f --- /dev/null +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.h @@ -0,0 +1,253 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef TOY_TGSI_H +#define TOY_TGSI_H + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "toy_compiler.h" + +struct tgsi_token; +struct tgsi_full_instruction; +struct util_hash_table; + +typedef void (*toy_tgsi_translate)(struct toy_compiler *tc, + const struct tgsi_full_instruction *tgsi_inst, + struct toy_dst *dst, + struct toy_src *src); + +struct toy_tgsi { + struct toy_compiler *tc; + bool aos; + const toy_tgsi_translate *translate_table; + + struct util_hash_table *reg_mapping; + + struct { + bool vs_prohibit_ucps; + int fs_coord_origin; + int fs_coord_pixel_center; + bool fs_color0_writes_all_cbufs; + int fs_depth_layout; + int gs_input_prim; + int gs_output_prim; + int gs_max_output_vertices; + } props; + + struct { + enum toy_type *types; + uint32_t (*buf)[4]; + int cur, size; + } imm_data; + + struct { + int index:16; + unsigned usage_mask:4; /* TGSI_WRITEMASK_x */ + unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ + unsigned semantic_index:8; + unsigned interp:4; /* TGSI_INTERPOLATE_x */ + unsigned centroid:1; + } inputs[PIPE_MAX_SHADER_INPUTS]; + int num_inputs; + + struct { + int index:16; + unsigned undefined_mask:4; + unsigned usage_mask:4; /* TGSI_WRITEMASK_x */ + unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ + unsigned semantic_index:8; + } outputs[PIPE_MAX_SHADER_OUTPUTS]; + int num_outputs; + + struct { + int index:16; + unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ + unsigned semantic_index:8; + } system_values[8]; + int num_system_values; + + bool uses_kill; +}; + +/** + * Find the slot of the TGSI input. + */ +static inline int +toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index) +{ + int slot; + + for (slot = 0; slot < tgsi->num_inputs; slot++) { + if (tgsi->inputs[slot].index == index) + return slot; + } + + return -1; +} + +/** + * Find the slot of the TGSI system value. + */ +static inline int +toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index) +{ + int slot; + + for (slot = 0; slot < tgsi->num_system_values; slot++) { + if (tgsi->system_values[slot].index == index) + return slot; + } + + return -1; +} + +/** + * Return the immediate data of the TGSI immediate. + */ +static inline const uint32_t * +toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index, + enum toy_type *type) +{ + const uint32_t *imm; + + if (index >= tgsi->imm_data.cur) + return NULL; + + imm = tgsi->imm_data.buf[index]; + if (type) + *type = tgsi->imm_data.types[index]; + + return imm; +} + +/** + * Return the dimension of the texture coordinates, as well as the location of + * the shadow reference value or the sample index. + */ +static inline int +toy_tgsi_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample) +{ + int dim; + + /* + * Depending on the texture target, (src0, src1.x) is interpreted + * differently: + * + * (s, *, *, *, *), for 1D + * (s, t, *, *, *), for 2D, RECT + * (s, t, r, *, *), for 3D, CUBE + * + * (s, layer, *, *, *), for 1D_ARRAY + * (s, t, layer, *, *), for 2D_ARRAY + * (s, t, r, layer, *), for CUBE_ARRAY + * + * (s, *, shadow, *, *), for SHADOW1D + * (s, t, shadow, *, *), for SHADOW2D, SHADOWRECT + * (s, t, r, shadow, *), for SHADOWCUBE + * + * (s, layer, shadow, *, *), for SHADOW1D_ARRAY + * (s, t, layer, shadow, *), for SHADOW2D_ARRAY + * (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY + * + * (s, t, sample, *, *), for 2D_MSAA + * (s, t, layer, sample, *), for 2D_ARRAY_MSAA + */ + switch (tgsi_tex) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + dim = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + dim = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + dim = 3; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + dim = 4; + break; + default: + assert(!"unknown texture target"); + dim = 0; + break; + } + + if (shadow_or_sample) { + switch (tgsi_tex) { + case TGSI_TEXTURE_SHADOW1D: + /* there is a gap */ + *shadow_or_sample = 2; + break; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + *shadow_or_sample = dim; + break; + default: + /* no shadow nor sample */ + *shadow_or_sample = -1; + break; + } + } + + return dim; +} + +void +toy_compiler_translate_tgsi(struct toy_compiler *tc, + const struct tgsi_token *tokens, bool aos, + struct toy_tgsi *tgsi); + +void +toy_tgsi_cleanup(struct toy_tgsi *tgsi); + +int +toy_tgsi_get_vrf(const struct toy_tgsi *tgsi, + enum tgsi_file_type file, int dimension, int index); + +void +toy_tgsi_dump(const struct toy_tgsi *tgsi); + +#endif /* TOY_TGSI_H */ |