diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/.gitignore | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.c | 38 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_compact.c | 558 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_debug.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_structs.h | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_eu_compact.c | 296 |
10 files changed, 946 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/.gitignore b/src/mesa/drivers/dri/i965/.gitignore index fe4578e39d4..c6ea403f5db 100644 --- a/src/mesa/drivers/dri/i965/.gitignore +++ b/src/mesa/drivers/dri/i965/.gitignore @@ -1,3 +1,4 @@ Makefile i965_symbols_test libi965_dri.la +test_eu_compact diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 0ac3de75ca6..5bb62c49efd 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -51,16 +51,30 @@ libi965_dri_la_SOURCES = \ $(i965_C_FILES) \ $(i965_CXX_FILES) +# list of libs to be linked against by i965_dri.so and i965 test programs. COMMON_LIBS = \ libi965_dri.la \ $(DRI_LIB_DEPS) \ $(INTEL_LIBS) \ ../common/libdricommon.la +TEST_LIBS = \ + $(COMMON_LIBS) \ + -lrt \ + ../common/libdri_test_stubs.la + i965_dri_la_SOURCES = i965_dri_la_LIBADD = $(COMMON_LIBS) i965_dri_la_LDFLAGS = -module -avoid-version -shared +TESTS = test_eu_compact +check_PROGRAMS = test_eu_compact + +test_eu_compact_SOURCES = \ + test_eu_compact.c +nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp +test_eu_compact_LDADD = $(TEST_LIBS) + # Provide compatibility with scripts for the old Mesa build system for # a while by putting a link to the driver into /lib of the build tree. all-local: i965_dri.la diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index d6d189a1841..3715b0f300f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -44,6 +44,7 @@ i965_C_FILES = \ brw_draw.c \ brw_draw_upload.c \ brw_eu.c \ + brw_eu_compact.c \ brw_eu_debug.c \ brw_eu_emit.c \ brw_eu_util.c \ diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 20a8ec4229e..02a07ec17b4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -214,6 +214,11 @@ const GLuint *brw_get_program( struct brw_compile *p, { GLuint i; + brw_compact_instructions(p); + + /* We emit a cacheline (8 instructions) of NOPs at the end of the program to + * make sure that instruction prefetch doesn't wander off into some other BO. + */ for (i = 0; i < 8; i++) brw_NOP(p); @@ -224,19 +229,36 @@ const GLuint *brw_get_program( struct brw_compile *p, void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end) { + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; void *store = p->store; + bool dump_hex = false; - for (int offset = start; offset < end; offset += 16) { + for (int offset = start; offset < end;) { struct brw_instruction *insn = store + offset; - + struct brw_instruction uncompacted; printf("0x%08x: ", offset); - if (0) { - printf("0x%08x 0x%08x 0x%08x 0x%08x ", - ((uint32_t *)insn)[3], - ((uint32_t *)insn)[2], - ((uint32_t *)insn)[1], - ((uint32_t *)insn)[0]); + if (insn->header.cmpt_control) { + struct brw_compact_instruction *compacted = (void *)insn; + if (dump_hex) { + printf("0x%08x 0x%08x ", + ((uint32_t *)insn)[1], + ((uint32_t *)insn)[0]); + } + + brw_uncompact_instruction(intel, &uncompacted, compacted); + insn = &uncompacted; + offset += 8; + } else { + if (dump_hex) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)insn)[3], + ((uint32_t *)insn)[2], + ((uint32_t *)insn)[1], + ((uint32_t *)insn)[0]); + } + offset += 16; } brw_disasm(stdout, insn, p->brw->intel.gen); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 2fa84dff490..21967bd0cd7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -1107,6 +1107,19 @@ void brw_set_uip_jip(struct brw_compile *p); uint32_t brw_swap_cmod(uint32_t cmod); +/* brw_eu_compact.c */ +void brw_compact_instructions(struct brw_compile *p); +void brw_uncompact_instruction(struct intel_context *intel, + struct brw_instruction *dst, + struct brw_compact_instruction *src); +bool brw_try_compact_instruction(struct brw_compile *p, + struct brw_compact_instruction *dst, + struct brw_instruction *src); + +void brw_debug_compact_uncompact(struct intel_context *intel, + struct brw_instruction *orig, + struct brw_instruction *uncompacted); + /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); void brw_remove_duplicate_mrf_moves(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c new file mode 100644 index 00000000000..210657adce0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -0,0 +1,558 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_eu_compact.c + * + * Instruction compaction is a feature of gm45 and newer hardware that allows + * for a smaller instruction encoding. + * + * The instruction cache is on the order of 32KB, and many programs generate + * far more instructions than that. The instruction cache is built to barely + * keep up with instruction dispatch abaility in cache hit cases -- L1 + * instruction cache misses that still hit in the next level could limit + * throughput by around 50%. + * + * The idea of instruction compaction is that most instructions use a tiny + * subset of the GPU functionality, so we can encode what would be a 16 byte + * instruction in 8 bytes using some lookup tables for various fields. + */ + +#include "brw_context.h" +#include "brw_eu.h" + +static const uint32_t gen6_control_index_table[32] = { + 0b00000000000000000, + 0b01000000000000000, + 0b00110000000000000, + 0b00000000100000000, + 0b00010000000000000, + 0b00001000100000000, + 0b00000000100000010, + 0b00000000000000010, + 0b01000000100000000, + 0b01010000000000000, + 0b10110000000000000, + 0b00100000000000000, + 0b11010000000000000, + 0b11000000000000000, + 0b01001000100000000, + 0b01000000000001000, + 0b01000000000000100, + 0b00000000000001000, + 0b00000000000000100, + 0b00111000100000000, + 0b00001000100000010, + 0b00110000100000000, + 0b00110000000000001, + 0b00100000000000001, + 0b00110000000000010, + 0b00110000000000101, + 0b00110000000001001, + 0b00110000000010000, + 0b00110000000000011, + 0b00110000000000100, + 0b00110000100001000, + 0b00100000000001001 +}; + +static const uint32_t gen6_datatype_table[32] = { + 0b001001110000000000, + 0b001000110000100000, + 0b001001110000000001, + 0b001000000001100000, + 0b001010110100101001, + 0b001000000110101101, + 0b001100011000101100, + 0b001011110110101101, + 0b001000000111101100, + 0b001000000001100001, + 0b001000110010100101, + 0b001000000001000001, + 0b001000001000110001, + 0b001000001000101001, + 0b001000000000100000, + 0b001000001000110010, + 0b001010010100101001, + 0b001011010010100101, + 0b001000000110100101, + 0b001100011000101001, + 0b001011011000101100, + 0b001011010110100101, + 0b001011110110100101, + 0b001111011110111101, + 0b001111011110111100, + 0b001111011110111101, + 0b001111011110011101, + 0b001111011110111110, + 0b001000000000100001, + 0b001000000000100010, + 0b001001111111011101, + 0b001000001110111110, +}; + +static const uint32_t gen6_subreg_table[32] = { + 0b000000000000000, + 0b000000000000100, + 0b000000110000000, + 0b111000000000000, + 0b011110000001000, + 0b000010000000000, + 0b000000000010000, + 0b000110000001100, + 0b001000000000000, + 0b000001000000000, + 0b000001010010100, + 0b000000001010110, + 0b010000000000000, + 0b110000000000000, + 0b000100000000000, + 0b000000010000000, + 0b000000000001000, + 0b100000000000000, + 0b000001010000000, + 0b001010000000000, + 0b001100000000000, + 0b000000001010100, + 0b101101010010100, + 0b010100000000000, + 0b000000010001111, + 0b011000000000000, + 0b111110000000000, + 0b101000000000000, + 0b000000000001111, + 0b000100010001111, + 0b001000010001111, + 0b000110000000000, +}; + +static const uint32_t gen6_src_index_table[32] = { + 0b000000000000, + 0b010110001000, + 0b010001101000, + 0b001000101000, + 0b011010010000, + 0b000100100000, + 0b010001101100, + 0b010101110000, + 0b011001111000, + 0b001100101000, + 0b010110001100, + 0b001000100000, + 0b010110001010, + 0b000000000010, + 0b010101010000, + 0b010101101000, + 0b111101001100, + 0b111100101100, + 0b011001110000, + 0b010110001001, + 0b010101011000, + 0b001101001000, + 0b010000101100, + 0b010000000000, + 0b001101110000, + 0b001100010000, + 0b001100000000, + 0b010001101010, + 0b001101111000, + 0b000001110000, + 0b001100100000, + 0b001101010000, +}; + +static bool +set_control_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t *src_u32 = (uint32_t *)src; + uint32_t uncompacted = 0; + + uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0; + uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16; + + for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) { + if (gen6_control_index_table[i] == uncompacted) { + dst->dw0.control_index = i; + return true; + } + } + + return false; +} + +static bool +set_datatype_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t uncompacted = 0; + + uncompacted |= src->bits1.ud & 0x7fff; + uncompacted |= (src->bits1.ud >> 29) << 15; + + for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) { + if (gen6_datatype_table[i] == uncompacted) { + dst->dw0.data_type_index = i; + return true; + } + } + + return false; +} + +static bool +set_subreg_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t uncompacted = 0; + + uncompacted |= src->bits1.da1.dest_subreg_nr << 0; + uncompacted |= src->bits2.da1.src0_subreg_nr << 5; + uncompacted |= src->bits3.da1.src1_subreg_nr << 10; + + for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) { + if (gen6_subreg_table[i] == uncompacted) { + dst->dw0.sub_reg_index = i; + return true; + } + } + + return false; +} + +static bool +get_src_index(uint32_t uncompacted, + uint32_t *compacted) +{ + for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) { + if (gen6_src_index_table[i] == uncompacted) { + *compacted = i; + return true; + } + } + + return false; +} + +static bool +set_src0_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t compacted, uncompacted = 0; + + uncompacted |= (src->bits2.ud >> 13) & 0xfff; + + if (!get_src_index(uncompacted, &compacted)) + return false; + + dst->dw0.src0_index = compacted & 0x3; + dst->dw1.src0_index = compacted >> 2; + + return true; +} + +static bool +set_src1_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t compacted, uncompacted = 0; + + uncompacted |= (src->bits3.ud >> 13) & 0xfff; + + if (!get_src_index(uncompacted, &compacted)) + return false; + + dst->dw1.src1_index = compacted; + + return true; +} + +/** + * Tries to compact instruction src into dst. + * + * It doesn't modify dst unless src is compactable, which is relied on by + * brw_compact_instructions(). + */ +bool +brw_try_compact_instruction(struct brw_compile *p, + struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + struct brw_compact_instruction temp; + + /* FINISHME: immediates */ + if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE || + src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return false; + + memset(&temp, 0, sizeof(temp)); + + temp.dw0.opcode = src->header.opcode; + temp.dw0.debug_control = src->header.debug_control; + if (!set_control_index(&temp, src)) + return false; + if (!set_datatype_index(&temp, src)) + return false; + if (!set_subreg_index(&temp, src)) + return false; + temp.dw0.acc_wr_control = src->header.acc_wr_control; + temp.dw0.conditionalmod = src->header.destreg__conditionalmod; + temp.dw0.flag_reg_nr = src->bits2.da1.flag_reg_nr; + temp.dw0.cmpt_ctrl = 1; + if (!set_src0_index(&temp, src)) + return false; + if (!set_src1_index(&temp, src)) + return false; + temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr; + temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr; + temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr; + + *dst = temp; + + return true; +} + +static void +set_uncompacted_control(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t *dst_u32 = (uint32_t *)dst; + uint32_t uncompacted = gen6_control_index_table[src->dw0.control_index]; + + dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8; + dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31; +} + +static void +set_uncompacted_datatype(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = gen6_datatype_table[src->dw0.data_type_index]; + + dst->bits1.ud &= ~(0x7 << 29); + dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29; + dst->bits1.ud &= ~0x7fff; + dst->bits1.ud |= uncompacted & 0x7fff; +} + +static void +set_uncompacted_subreg(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = gen6_subreg_table[src->dw0.sub_reg_index]; + + dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f; + dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f; + dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f; +} + +static void +set_uncompacted_src0(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2; + uint32_t uncompacted = gen6_src_index_table[compacted]; + + dst->bits2.ud |= uncompacted << 13; +} + +static void +set_uncompacted_src1(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = gen6_src_index_table[src->dw1.src1_index]; + + dst->bits3.ud |= uncompacted << 13; +} + +void +brw_uncompact_instruction(struct intel_context *intel, + struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + memset(dst, 0, sizeof(*dst)); + + dst->header.opcode = src->dw0.opcode; + dst->header.debug_control = src->dw0.debug_control; + + set_uncompacted_control(dst, src); + set_uncompacted_datatype(dst, src); + set_uncompacted_subreg(dst, src); + dst->header.acc_wr_control = src->dw0.acc_wr_control; + dst->header.destreg__conditionalmod = src->dw0.conditionalmod; + dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr; + set_uncompacted_src0(dst, src); + set_uncompacted_src1(dst, src); + dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr; + dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr; + dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr; +} + +void brw_debug_compact_uncompact(struct intel_context *intel, + struct brw_instruction *orig, + struct brw_instruction *uncompacted) +{ + fprintf(stderr, "Instruction compact/uncompact changed:\n"); + + fprintf(stderr, " before: "); + brw_disasm(stderr, orig, intel->gen); + + fprintf(stderr, " after: "); + brw_disasm(stderr, uncompacted, intel->gen); + + uint32_t *before_bits = (uint32_t *)orig; + uint32_t *after_bits = (uint32_t *)uncompacted; + printf(" changed bits:\n"); + for (int i = 0; i < 128; i++) { + uint32_t before = before_bits[i / 32] & (1 << (i & 31)); + uint32_t after = after_bits[i / 32] & (1 << (i & 31)); + + if (before != after) { + printf(" bit %d, %s to %s\n", i, + before ? "set" : "unset", + after ? "set" : "unset"); + } + } +} + +void +brw_compact_instructions(struct brw_compile *p) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + void *store = p->store; + + assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); + assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); + assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); + assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); + + if (intel->gen != 6) + return; + + /* FINISHME: If we are going to compress instructions between flow control, + * we have to do fixups to flow control offsets to represent the new + * distances, since flow control uses (virtual address distance)/2, not a + * logical instruction count. We can at least compress up until an IF + * instruction, but there's no instruction indicating the start of a + * do/while loop. + */ + bool continue_compressing = true; + for (int i = 0; i < p->nr_insn; i++) { + if (p->store[i].header.opcode == BRW_OPCODE_WHILE) + return; + } + + int src_offset; + int offset = 0; + for (src_offset = 0; src_offset < p->nr_insn * 16;) { + struct brw_instruction *src = store + src_offset; + void *dst = store + offset; + + switch (src->header.opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_HALT: + case BRW_OPCODE_JMPI: + continue_compressing = false; + break; + } + + struct brw_instruction saved = *src; + + if (continue_compressing && + !src->header.cmpt_control && + brw_try_compact_instruction(p, dst, src)) { + + /* debug */ + if (INTEL_DEBUG) { + struct brw_instruction uncompacted; + brw_uncompact_instruction(intel, &uncompacted, dst); + if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { + brw_debug_compact_uncompact(intel, &saved, &uncompacted); + } + } + + offset += 8; + src_offset += 16; + } else { + int size = src->header.cmpt_control ? 8 : 16; + + /* It appears that the end of thread SEND instruction needs to be + * aligned, or the GPU hangs. + */ + if ((src->header.opcode == BRW_OPCODE_SEND || + src->header.opcode == BRW_OPCODE_SENDC) && + src->bits3.generic.end_of_thread && + (offset & 8) != 0) { + struct brw_compact_instruction *align = store + offset; + memset(align, 0, sizeof(*align)); + align->dw0.opcode = BRW_OPCODE_NOP; + align->dw0.cmpt_ctrl = 1; + offset += 8; + dst = store + offset; + } + + /* If we didn't compact this instruction, we need to move it down into + * place. + */ + if (offset != src_offset) { + memmove(dst, src, size); + } + offset += size; + src_offset += size; + } + } + + /* p->nr_insn is counting the number of uncompacted instructions still, so + * divide. We do want to be sure there's a valid instruction in any + * alignment padding, so that the next compression pass (for the FS 8/16 + * compile passes) parses correctly. + */ + if (offset & 8) { + struct brw_compact_instruction *align = store + offset; + memset(align, 0, sizeof(*align)); + align->dw0.opcode = BRW_OPCODE_NOP; + align->dw0.cmpt_ctrl = 1; + offset += 8; + } + p->next_insn_offset = offset; + p->nr_insn = offset / 16; + + if (0) { + fprintf(stdout, "dumping compacted program\n"); + brw_dump_compile(p, stdout, 0, p->next_insn_offset); + + int cmp = 0; + for (offset = 0; offset < p->next_insn_offset;) { + struct brw_instruction *insn = store + offset; + + if (insn->header.cmpt_control) { + offset += 8; + cmp++; + } else { + offset += 16; + } + } + fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8, + cmp * 8 * 100 / (offset + cmp * 8)); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c index 99453afdcaf..a8e10a9edf6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_debug.c +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -32,6 +32,7 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "brw_context.h" #include "brw_eu.h" void brw_print_reg( struct brw_reg hwreg ) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cd6819176bc..9ac2a49d948 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1947,6 +1947,12 @@ fs_visitor::run() brw_wm_payload_setup(brw, c); if (c->dispatch_width == 16) { + /* We have to do a compaction pass now, or the one at the end of + * execution will squash down where our prog_offset start needs + * to be. + */ + brw_compact_instructions(p); + /* align to 64 byte boundary. */ while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) { brw_NOP(p); diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 465d2a28a8e..26def6e9054 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1048,6 +1048,8 @@ struct brw_instruction GLuint dest_subreg_nr:3; GLuint dest_reg_nr:8; } da3src; + + uint32_t ud; } bits1; @@ -1137,6 +1139,8 @@ struct brw_instruction GLuint src1_swizzle:8; GLuint src1_subreg_nr_low:2; } da3src; + + uint32_t ud; } bits2; union @@ -1534,5 +1538,27 @@ struct brw_instruction } bits3; }; +struct brw_compact_instruction { + struct { + unsigned opcode:7; /* 0- 6 */ + unsigned debug_control:1; /* 7- 7 */ + unsigned control_index:5; /* 8-12 */ + unsigned data_type_index:5; /* 13-17 */ + unsigned sub_reg_index:5; /* 18-22 */ + unsigned acc_wr_control:1; /* 23-23 */ + unsigned conditionalmod:4; /* 24-27 */ + unsigned flag_reg_nr:1; /* 28-28 */ + unsigned cmpt_ctrl:1; /* 29-29 */ + unsigned src0_index:2; /* 30-31 */ + } dw0; + + struct { + unsigned src0_index:3; /* 32-24 */ + unsigned src1_index:5; /* 35-39 */ + unsigned dst_reg_nr:8; /* 40-47 */ + unsigned src0_reg_nr:8; /* 48-55 */ + unsigned src1_reg_nr:8; /* 56-63 */ + } dw1; +}; #endif diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c new file mode 100644 index 00000000000..e9d43015ea3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/test_eu_compact.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include "glsl/ralloc.h" +#include "brw_context.h" +#include "brw_eu.h" + +static bool +test_compact_instruction(struct brw_compile *p, struct brw_instruction src) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + + struct brw_compact_instruction dst; + memset(&dst, 0xd0, sizeof(dst)); + + if (brw_try_compact_instruction(p, &dst, &src)) { + struct brw_instruction uncompacted; + + brw_uncompact_instruction(intel, &uncompacted, &dst); + if (memcmp(&uncompacted, &src, sizeof(src))) { + brw_debug_compact_uncompact(intel, &src, &uncompacted); + return false; + } + } else { + struct brw_compact_instruction unchanged; + memset(&unchanged, 0xd0, sizeof(unchanged)); + /* It's not supposed to change dst unless it compacted. */ + if (memcmp(&unchanged, &dst, sizeof(dst))) { + fprintf(stderr, "Failed to compact, but dst changed\n"); + fprintf(stderr, " Instruction: "); + brw_disasm(stderr, &src, intel->gen); + return false; + } + } + + return true; +} + +/** + * When doing fuzz testing, pad bits won't round-trip. + * + * This sort of a superset of skip_bit, which is testing for changing bits that + * aren't worth testing for fuzzing. We also just want to clear bits that + * become meaningless once fuzzing twiddles a related bit. + */ +static void +clear_pad_bits(struct brw_instruction *inst) +{ + if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC && + inst->header.opcode != BRW_OPCODE_BREAK && + inst->header.opcode != BRW_OPCODE_CONTINUE && + inst->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE && + inst->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE) { + if (inst->bits3.da1.src1_address_mode) + inst->bits3.ia1.pad1 = 0; + else + inst->bits3.da1.pad0 = 0; + } +} + +static bool +skip_bit(struct brw_instruction *src, int bit) +{ + /* pad bit */ + if (bit == 7) + return true; + + /* The compact bit -- uncompacted can't have it set. */ + if (bit == 29) + return true; + + /* pad bit */ + if (bit == 47) + return true; + + /* pad bits */ + if (bit >= 90 && bit <= 95) + return true; + + /* sometimes these are pad bits. */ + if (src->header.opcode != BRW_OPCODE_SEND && + src->header.opcode != BRW_OPCODE_SENDC && + src->header.opcode != BRW_OPCODE_BREAK && + src->header.opcode != BRW_OPCODE_CONTINUE && + src->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE && + src->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE && + bit >= 121) { + return true; + } + + return false; +} + +static bool +test_fuzz_compact_instruction(struct brw_compile *p, + struct brw_instruction src) +{ + for (int bit0 = 0; bit0 < 128; bit0++) { + if (skip_bit(&src, bit0)) + continue; + + for (int bit1 = 0; bit1 < 128; bit1++) { + struct brw_instruction instr = src; + uint32_t *bits = (uint32_t *)&instr; + + if (skip_bit(&src, bit1)) + continue; + + bits[bit0 / 32] ^= (1 << (bit0 & 31)); + bits[bit1 / 32] ^= (1 << (bit1 & 31)); + + clear_pad_bits(&instr); + + if (!test_compact_instruction(p, instr)) { + printf(" twiddled bits for fuzzing %d, %d\n", bit0, bit1); + return false; + } + } + } + + return true; +} + +static void +gen_ADD_GRF_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec8_grf(0, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + struct brw_reg g4 = brw_vec8_grf(4, 0); + + brw_ADD(p, g0, g2, g4); +} + +static void +gen_ADD_GRF_GRF_IMM(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec8_grf(0, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + + brw_ADD(p, g0, g2, brw_imm_f(1.0)); +} + +static void +gen_ADD_GRF_GRF_IMM_d(struct brw_compile *p) +{ + struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D); + struct brw_reg g2 = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_D); + + brw_ADD(p, g0, g2, brw_imm_d(1)); +} + +static void +gen_MOV_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec8_grf(0, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + + brw_MOV(p, g0, g2); +} + +static void +gen_ADD_MRF_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + struct brw_reg g4 = brw_vec8_grf(4, 0); + + brw_ADD(p, m6, g2, g4); +} + +static void +gen_ADD_vec1_GRF_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec1_grf(0, 0); + struct brw_reg g2 = brw_vec1_grf(2, 0); + struct brw_reg g4 = brw_vec1_grf(4, 0); + + brw_ADD(p, g0, g2, g4); +} + +static void +gen_PLN_MRF_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0); + struct brw_reg interp = brw_vec1_grf(2, 0); + struct brw_reg g4 = brw_vec8_grf(4, 0); + + brw_PLN(p, m6, interp, g4); +} + +static void +gen_f0_MOV_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec8_grf(0, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + + brw_push_insn_state(p); + brw_set_predicate_control(p, true); + brw_MOV(p, g0, g2); + brw_pop_insn_state(p); +} + +/* The handling of f1 vs f0 changes between gen6 and gen7. Explicitly test + * it, so that we run the fuzzing can run over all the other bits that might + * interact with it. + */ +static void +gen_f1_MOV_GRF_GRF(struct brw_compile *p) +{ + struct brw_reg g0 = brw_vec8_grf(0, 0); + struct brw_reg g2 = brw_vec8_grf(2, 0); + + brw_push_insn_state(p); + brw_set_predicate_control(p, true); + current_insn(p)->bits2.da1.flag_reg_nr = 1; + brw_MOV(p, g0, g2); + brw_pop_insn_state(p); +} + +struct { + void (*func)(struct brw_compile *p); +} tests[] = { + { gen_MOV_GRF_GRF }, + { gen_ADD_GRF_GRF_GRF }, + { gen_ADD_GRF_GRF_IMM }, + { gen_ADD_GRF_GRF_IMM_d }, + { gen_ADD_MRF_GRF_GRF }, + { gen_ADD_vec1_GRF_GRF_GRF }, + { gen_PLN_MRF_GRF_GRF }, + { gen_f0_MOV_GRF_GRF }, + { gen_f1_MOV_GRF_GRF }, +}; + +int +main(int argc, char **argv) +{ + struct brw_context *brw = calloc(1, sizeof(*brw)); + struct intel_context *intel = &brw->intel; + intel->gen = 6; + int ret = 0; + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + for (int align_16 = 0; align_16 <= 1; align_16++) { + struct brw_compile *p = rzalloc(NULL, struct brw_compile); + brw_init_compile(brw, p, p); + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + if (align_16) + brw_set_access_mode(p, BRW_ALIGN_16); + else + brw_set_access_mode(p, BRW_ALIGN_1); + + tests[i].func(p); + assert(p->nr_insn == 1); + + if (!test_compact_instruction(p, p->store[0])) { + ret = 1; + continue; + } + + if (!test_fuzz_compact_instruction(p, p->store[0])) { + ret = 1; + continue; + } + + ralloc_free(p); + } + } + + return ret; +} |