aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/.gitignore1
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am14
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c38
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h13
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c558
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_debug.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h26
-rw-r--r--src/mesa/drivers/dri/i965/test_eu_compact.c296
10 files changed, 946 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/.gitignore b/src/mesa/drivers/dri/i965/.gitignore
index fe4578e39d4..c6ea403f5db 100644
--- a/src/mesa/drivers/dri/i965/.gitignore
+++ b/src/mesa/drivers/dri/i965/.gitignore
@@ -1,3 +1,4 @@
Makefile
i965_symbols_test
libi965_dri.la
+test_eu_compact
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 0ac3de75ca6..5bb62c49efd 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -51,16 +51,30 @@ libi965_dri_la_SOURCES = \
$(i965_C_FILES) \
$(i965_CXX_FILES)
+# list of libs to be linked against by i965_dri.so and i965 test programs.
COMMON_LIBS = \
libi965_dri.la \
$(DRI_LIB_DEPS) \
$(INTEL_LIBS) \
../common/libdricommon.la
+TEST_LIBS = \
+ $(COMMON_LIBS) \
+ -lrt \
+ ../common/libdri_test_stubs.la
+
i965_dri_la_SOURCES =
i965_dri_la_LIBADD = $(COMMON_LIBS)
i965_dri_la_LDFLAGS = -module -avoid-version -shared
+TESTS = test_eu_compact
+check_PROGRAMS = test_eu_compact
+
+test_eu_compact_SOURCES = \
+ test_eu_compact.c
+nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
+test_eu_compact_LDADD = $(TEST_LIBS)
+
# Provide compatibility with scripts for the old Mesa build system for
# a while by putting a link to the driver into /lib of the build tree.
all-local: i965_dri.la
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index d6d189a1841..3715b0f300f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -44,6 +44,7 @@ i965_C_FILES = \
brw_draw.c \
brw_draw_upload.c \
brw_eu.c \
+ brw_eu_compact.c \
brw_eu_debug.c \
brw_eu_emit.c \
brw_eu_util.c \
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 20a8ec4229e..02a07ec17b4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -214,6 +214,11 @@ const GLuint *brw_get_program( struct brw_compile *p,
{
GLuint i;
+ brw_compact_instructions(p);
+
+ /* We emit a cacheline (8 instructions) of NOPs at the end of the program to
+ * make sure that instruction prefetch doesn't wander off into some other BO.
+ */
for (i = 0; i < 8; i++)
brw_NOP(p);
@@ -224,19 +229,36 @@ const GLuint *brw_get_program( struct brw_compile *p,
void
brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end)
{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
void *store = p->store;
+ bool dump_hex = false;
- for (int offset = start; offset < end; offset += 16) {
+ for (int offset = start; offset < end;) {
struct brw_instruction *insn = store + offset;
-
+ struct brw_instruction uncompacted;
printf("0x%08x: ", offset);
- if (0) {
- printf("0x%08x 0x%08x 0x%08x 0x%08x ",
- ((uint32_t *)insn)[3],
- ((uint32_t *)insn)[2],
- ((uint32_t *)insn)[1],
- ((uint32_t *)insn)[0]);
+ if (insn->header.cmpt_control) {
+ struct brw_compact_instruction *compacted = (void *)insn;
+ if (dump_hex) {
+ printf("0x%08x 0x%08x ",
+ ((uint32_t *)insn)[1],
+ ((uint32_t *)insn)[0]);
+ }
+
+ brw_uncompact_instruction(intel, &uncompacted, compacted);
+ insn = &uncompacted;
+ offset += 8;
+ } else {
+ if (dump_hex) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)insn)[3],
+ ((uint32_t *)insn)[2],
+ ((uint32_t *)insn)[1],
+ ((uint32_t *)insn)[0]);
+ }
+ offset += 16;
}
brw_disasm(stdout, insn, p->brw->intel.gen);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 2fa84dff490..21967bd0cd7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -1107,6 +1107,19 @@ void brw_set_uip_jip(struct brw_compile *p);
uint32_t brw_swap_cmod(uint32_t cmod);
+/* brw_eu_compact.c */
+void brw_compact_instructions(struct brw_compile *p);
+void brw_uncompact_instruction(struct intel_context *intel,
+ struct brw_instruction *dst,
+ struct brw_compact_instruction *src);
+bool brw_try_compact_instruction(struct brw_compile *p,
+ struct brw_compact_instruction *dst,
+ struct brw_instruction *src);
+
+void brw_debug_compact_uncompact(struct intel_context *intel,
+ struct brw_instruction *orig,
+ struct brw_instruction *uncompacted);
+
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
new file mode 100644
index 00000000000..210657adce0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_eu_compact.c
+ *
+ * Instruction compaction is a feature of gm45 and newer hardware that allows
+ * for a smaller instruction encoding.
+ *
+ * The instruction cache is on the order of 32KB, and many programs generate
+ * far more instructions than that. The instruction cache is built to barely
+ * keep up with instruction dispatch abaility in cache hit cases -- L1
+ * instruction cache misses that still hit in the next level could limit
+ * throughput by around 50%.
+ *
+ * The idea of instruction compaction is that most instructions use a tiny
+ * subset of the GPU functionality, so we can encode what would be a 16 byte
+ * instruction in 8 bytes using some lookup tables for various fields.
+ */
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+static const uint32_t gen6_control_index_table[32] = {
+ 0b00000000000000000,
+ 0b01000000000000000,
+ 0b00110000000000000,
+ 0b00000000100000000,
+ 0b00010000000000000,
+ 0b00001000100000000,
+ 0b00000000100000010,
+ 0b00000000000000010,
+ 0b01000000100000000,
+ 0b01010000000000000,
+ 0b10110000000000000,
+ 0b00100000000000000,
+ 0b11010000000000000,
+ 0b11000000000000000,
+ 0b01001000100000000,
+ 0b01000000000001000,
+ 0b01000000000000100,
+ 0b00000000000001000,
+ 0b00000000000000100,
+ 0b00111000100000000,
+ 0b00001000100000010,
+ 0b00110000100000000,
+ 0b00110000000000001,
+ 0b00100000000000001,
+ 0b00110000000000010,
+ 0b00110000000000101,
+ 0b00110000000001001,
+ 0b00110000000010000,
+ 0b00110000000000011,
+ 0b00110000000000100,
+ 0b00110000100001000,
+ 0b00100000000001001
+};
+
+static const uint32_t gen6_datatype_table[32] = {
+ 0b001001110000000000,
+ 0b001000110000100000,
+ 0b001001110000000001,
+ 0b001000000001100000,
+ 0b001010110100101001,
+ 0b001000000110101101,
+ 0b001100011000101100,
+ 0b001011110110101101,
+ 0b001000000111101100,
+ 0b001000000001100001,
+ 0b001000110010100101,
+ 0b001000000001000001,
+ 0b001000001000110001,
+ 0b001000001000101001,
+ 0b001000000000100000,
+ 0b001000001000110010,
+ 0b001010010100101001,
+ 0b001011010010100101,
+ 0b001000000110100101,
+ 0b001100011000101001,
+ 0b001011011000101100,
+ 0b001011010110100101,
+ 0b001011110110100101,
+ 0b001111011110111101,
+ 0b001111011110111100,
+ 0b001111011110111101,
+ 0b001111011110011101,
+ 0b001111011110111110,
+ 0b001000000000100001,
+ 0b001000000000100010,
+ 0b001001111111011101,
+ 0b001000001110111110,
+};
+
+static const uint32_t gen6_subreg_table[32] = {
+ 0b000000000000000,
+ 0b000000000000100,
+ 0b000000110000000,
+ 0b111000000000000,
+ 0b011110000001000,
+ 0b000010000000000,
+ 0b000000000010000,
+ 0b000110000001100,
+ 0b001000000000000,
+ 0b000001000000000,
+ 0b000001010010100,
+ 0b000000001010110,
+ 0b010000000000000,
+ 0b110000000000000,
+ 0b000100000000000,
+ 0b000000010000000,
+ 0b000000000001000,
+ 0b100000000000000,
+ 0b000001010000000,
+ 0b001010000000000,
+ 0b001100000000000,
+ 0b000000001010100,
+ 0b101101010010100,
+ 0b010100000000000,
+ 0b000000010001111,
+ 0b011000000000000,
+ 0b111110000000000,
+ 0b101000000000000,
+ 0b000000000001111,
+ 0b000100010001111,
+ 0b001000010001111,
+ 0b000110000000000,
+};
+
+static const uint32_t gen6_src_index_table[32] = {
+ 0b000000000000,
+ 0b010110001000,
+ 0b010001101000,
+ 0b001000101000,
+ 0b011010010000,
+ 0b000100100000,
+ 0b010001101100,
+ 0b010101110000,
+ 0b011001111000,
+ 0b001100101000,
+ 0b010110001100,
+ 0b001000100000,
+ 0b010110001010,
+ 0b000000000010,
+ 0b010101010000,
+ 0b010101101000,
+ 0b111101001100,
+ 0b111100101100,
+ 0b011001110000,
+ 0b010110001001,
+ 0b010101011000,
+ 0b001101001000,
+ 0b010000101100,
+ 0b010000000000,
+ 0b001101110000,
+ 0b001100010000,
+ 0b001100000000,
+ 0b010001101010,
+ 0b001101111000,
+ 0b000001110000,
+ 0b001100100000,
+ 0b001101010000,
+};
+
+static bool
+set_control_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t *src_u32 = (uint32_t *)src;
+ uint32_t uncompacted = 0;
+
+ uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
+ uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
+
+ for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) {
+ if (gen6_control_index_table[i] == uncompacted) {
+ dst->dw0.control_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_datatype_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t uncompacted = 0;
+
+ uncompacted |= src->bits1.ud & 0x7fff;
+ uncompacted |= (src->bits1.ud >> 29) << 15;
+
+ for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) {
+ if (gen6_datatype_table[i] == uncompacted) {
+ dst->dw0.data_type_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_subreg_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t uncompacted = 0;
+
+ uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
+ uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
+ uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
+
+ for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) {
+ if (gen6_subreg_table[i] == uncompacted) {
+ dst->dw0.sub_reg_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+get_src_index(uint32_t uncompacted,
+ uint32_t *compacted)
+{
+ for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) {
+ if (gen6_src_index_table[i] == uncompacted) {
+ *compacted = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_src0_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t compacted, uncompacted = 0;
+
+ uncompacted |= (src->bits2.ud >> 13) & 0xfff;
+
+ if (!get_src_index(uncompacted, &compacted))
+ return false;
+
+ dst->dw0.src0_index = compacted & 0x3;
+ dst->dw1.src0_index = compacted >> 2;
+
+ return true;
+}
+
+static bool
+set_src1_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t compacted, uncompacted = 0;
+
+ uncompacted |= (src->bits3.ud >> 13) & 0xfff;
+
+ if (!get_src_index(uncompacted, &compacted))
+ return false;
+
+ dst->dw1.src1_index = compacted;
+
+ return true;
+}
+
+/**
+ * Tries to compact instruction src into dst.
+ *
+ * It doesn't modify dst unless src is compactable, which is relied on by
+ * brw_compact_instructions().
+ */
+bool
+brw_try_compact_instruction(struct brw_compile *p,
+ struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ struct brw_compact_instruction temp;
+
+ /* FINISHME: immediates */
+ if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
+ src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return false;
+
+ memset(&temp, 0, sizeof(temp));
+
+ temp.dw0.opcode = src->header.opcode;
+ temp.dw0.debug_control = src->header.debug_control;
+ if (!set_control_index(&temp, src))
+ return false;
+ if (!set_datatype_index(&temp, src))
+ return false;
+ if (!set_subreg_index(&temp, src))
+ return false;
+ temp.dw0.acc_wr_control = src->header.acc_wr_control;
+ temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
+ temp.dw0.flag_reg_nr = src->bits2.da1.flag_reg_nr;
+ temp.dw0.cmpt_ctrl = 1;
+ if (!set_src0_index(&temp, src))
+ return false;
+ if (!set_src1_index(&temp, src))
+ return false;
+ temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
+ temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
+ temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
+
+ *dst = temp;
+
+ return true;
+}
+
+static void
+set_uncompacted_control(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t *dst_u32 = (uint32_t *)dst;
+ uint32_t uncompacted = gen6_control_index_table[src->dw0.control_index];
+
+ dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
+ dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
+}
+
+static void
+set_uncompacted_datatype(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = gen6_datatype_table[src->dw0.data_type_index];
+
+ dst->bits1.ud &= ~(0x7 << 29);
+ dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
+ dst->bits1.ud &= ~0x7fff;
+ dst->bits1.ud |= uncompacted & 0x7fff;
+}
+
+static void
+set_uncompacted_subreg(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = gen6_subreg_table[src->dw0.sub_reg_index];
+
+ dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f;
+ dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f;
+ dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
+}
+
+static void
+set_uncompacted_src0(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
+ uint32_t uncompacted = gen6_src_index_table[compacted];
+
+ dst->bits2.ud |= uncompacted << 13;
+}
+
+static void
+set_uncompacted_src1(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = gen6_src_index_table[src->dw1.src1_index];
+
+ dst->bits3.ud |= uncompacted << 13;
+}
+
+void
+brw_uncompact_instruction(struct intel_context *intel,
+ struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ memset(dst, 0, sizeof(*dst));
+
+ dst->header.opcode = src->dw0.opcode;
+ dst->header.debug_control = src->dw0.debug_control;
+
+ set_uncompacted_control(dst, src);
+ set_uncompacted_datatype(dst, src);
+ set_uncompacted_subreg(dst, src);
+ dst->header.acc_wr_control = src->dw0.acc_wr_control;
+ dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
+ dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr;
+ set_uncompacted_src0(dst, src);
+ set_uncompacted_src1(dst, src);
+ dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
+ dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
+ dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
+}
+
+void brw_debug_compact_uncompact(struct intel_context *intel,
+ struct brw_instruction *orig,
+ struct brw_instruction *uncompacted)
+{
+ fprintf(stderr, "Instruction compact/uncompact changed:\n");
+
+ fprintf(stderr, " before: ");
+ brw_disasm(stderr, orig, intel->gen);
+
+ fprintf(stderr, " after: ");
+ brw_disasm(stderr, uncompacted, intel->gen);
+
+ uint32_t *before_bits = (uint32_t *)orig;
+ uint32_t *after_bits = (uint32_t *)uncompacted;
+ printf(" changed bits:\n");
+ for (int i = 0; i < 128; i++) {
+ uint32_t before = before_bits[i / 32] & (1 << (i & 31));
+ uint32_t after = after_bits[i / 32] & (1 << (i & 31));
+
+ if (before != after) {
+ printf(" bit %d, %s to %s\n", i,
+ before ? "set" : "unset",
+ after ? "set" : "unset");
+ }
+ }
+}
+
+void
+brw_compact_instructions(struct brw_compile *p)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ void *store = p->store;
+
+ assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
+ assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
+ assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
+ assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
+
+ if (intel->gen != 6)
+ return;
+
+ /* FINISHME: If we are going to compress instructions between flow control,
+ * we have to do fixups to flow control offsets to represent the new
+ * distances, since flow control uses (virtual address distance)/2, not a
+ * logical instruction count. We can at least compress up until an IF
+ * instruction, but there's no instruction indicating the start of a
+ * do/while loop.
+ */
+ bool continue_compressing = true;
+ for (int i = 0; i < p->nr_insn; i++) {
+ if (p->store[i].header.opcode == BRW_OPCODE_WHILE)
+ return;
+ }
+
+ int src_offset;
+ int offset = 0;
+ for (src_offset = 0; src_offset < p->nr_insn * 16;) {
+ struct brw_instruction *src = store + src_offset;
+ void *dst = store + offset;
+
+ switch (src->header.opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_HALT:
+ case BRW_OPCODE_JMPI:
+ continue_compressing = false;
+ break;
+ }
+
+ struct brw_instruction saved = *src;
+
+ if (continue_compressing &&
+ !src->header.cmpt_control &&
+ brw_try_compact_instruction(p, dst, src)) {
+
+ /* debug */
+ if (INTEL_DEBUG) {
+ struct brw_instruction uncompacted;
+ brw_uncompact_instruction(intel, &uncompacted, dst);
+ if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
+ brw_debug_compact_uncompact(intel, &saved, &uncompacted);
+ }
+ }
+
+ offset += 8;
+ src_offset += 16;
+ } else {
+ int size = src->header.cmpt_control ? 8 : 16;
+
+ /* It appears that the end of thread SEND instruction needs to be
+ * aligned, or the GPU hangs.
+ */
+ if ((src->header.opcode == BRW_OPCODE_SEND ||
+ src->header.opcode == BRW_OPCODE_SENDC) &&
+ src->bits3.generic.end_of_thread &&
+ (offset & 8) != 0) {
+ struct brw_compact_instruction *align = store + offset;
+ memset(align, 0, sizeof(*align));
+ align->dw0.opcode = BRW_OPCODE_NOP;
+ align->dw0.cmpt_ctrl = 1;
+ offset += 8;
+ dst = store + offset;
+ }
+
+ /* If we didn't compact this instruction, we need to move it down into
+ * place.
+ */
+ if (offset != src_offset) {
+ memmove(dst, src, size);
+ }
+ offset += size;
+ src_offset += size;
+ }
+ }
+
+ /* p->nr_insn is counting the number of uncompacted instructions still, so
+ * divide. We do want to be sure there's a valid instruction in any
+ * alignment padding, so that the next compression pass (for the FS 8/16
+ * compile passes) parses correctly.
+ */
+ if (offset & 8) {
+ struct brw_compact_instruction *align = store + offset;
+ memset(align, 0, sizeof(*align));
+ align->dw0.opcode = BRW_OPCODE_NOP;
+ align->dw0.cmpt_ctrl = 1;
+ offset += 8;
+ }
+ p->next_insn_offset = offset;
+ p->nr_insn = offset / 16;
+
+ if (0) {
+ fprintf(stdout, "dumping compacted program\n");
+ brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+
+ int cmp = 0;
+ for (offset = 0; offset < p->next_insn_offset;) {
+ struct brw_instruction *insn = store + offset;
+
+ if (insn->header.cmpt_control) {
+ offset += 8;
+ cmp++;
+ } else {
+ offset += 16;
+ }
+ }
+ fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
+ cmp * 8 * 100 / (offset + cmp * 8));
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c
index 99453afdcaf..a8e10a9edf6 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_debug.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c
@@ -32,6 +32,7 @@
#include "main/mtypes.h"
#include "main/imports.h"
+#include "brw_context.h"
#include "brw_eu.h"
void brw_print_reg( struct brw_reg hwreg )
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cd6819176bc..9ac2a49d948 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1947,6 +1947,12 @@ fs_visitor::run()
brw_wm_payload_setup(brw, c);
if (c->dispatch_width == 16) {
+ /* We have to do a compaction pass now, or the one at the end of
+ * execution will squash down where our prog_offset start needs
+ * to be.
+ */
+ brw_compact_instructions(p);
+
/* align to 64 byte boundary. */
while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
brw_NOP(p);
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 465d2a28a8e..26def6e9054 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1048,6 +1048,8 @@ struct brw_instruction
GLuint dest_subreg_nr:3;
GLuint dest_reg_nr:8;
} da3src;
+
+ uint32_t ud;
} bits1;
@@ -1137,6 +1139,8 @@ struct brw_instruction
GLuint src1_swizzle:8;
GLuint src1_subreg_nr_low:2;
} da3src;
+
+ uint32_t ud;
} bits2;
union
@@ -1534,5 +1538,27 @@ struct brw_instruction
} bits3;
};
+struct brw_compact_instruction {
+ struct {
+ unsigned opcode:7; /* 0- 6 */
+ unsigned debug_control:1; /* 7- 7 */
+ unsigned control_index:5; /* 8-12 */
+ unsigned data_type_index:5; /* 13-17 */
+ unsigned sub_reg_index:5; /* 18-22 */
+ unsigned acc_wr_control:1; /* 23-23 */
+ unsigned conditionalmod:4; /* 24-27 */
+ unsigned flag_reg_nr:1; /* 28-28 */
+ unsigned cmpt_ctrl:1; /* 29-29 */
+ unsigned src0_index:2; /* 30-31 */
+ } dw0;
+
+ struct {
+ unsigned src0_index:3; /* 32-24 */
+ unsigned src1_index:5; /* 35-39 */
+ unsigned dst_reg_nr:8; /* 40-47 */
+ unsigned src0_reg_nr:8; /* 48-55 */
+ unsigned src1_reg_nr:8; /* 56-63 */
+ } dw1;
+};
#endif
diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c
new file mode 100644
index 00000000000..e9d43015ea3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/test_eu_compact.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include "glsl/ralloc.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+static bool
+test_compact_instruction(struct brw_compile *p, struct brw_instruction src)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ struct brw_compact_instruction dst;
+ memset(&dst, 0xd0, sizeof(dst));
+
+ if (brw_try_compact_instruction(p, &dst, &src)) {
+ struct brw_instruction uncompacted;
+
+ brw_uncompact_instruction(intel, &uncompacted, &dst);
+ if (memcmp(&uncompacted, &src, sizeof(src))) {
+ brw_debug_compact_uncompact(intel, &src, &uncompacted);
+ return false;
+ }
+ } else {
+ struct brw_compact_instruction unchanged;
+ memset(&unchanged, 0xd0, sizeof(unchanged));
+ /* It's not supposed to change dst unless it compacted. */
+ if (memcmp(&unchanged, &dst, sizeof(dst))) {
+ fprintf(stderr, "Failed to compact, but dst changed\n");
+ fprintf(stderr, " Instruction: ");
+ brw_disasm(stderr, &src, intel->gen);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * When doing fuzz testing, pad bits won't round-trip.
+ *
+ * This sort of a superset of skip_bit, which is testing for changing bits that
+ * aren't worth testing for fuzzing. We also just want to clear bits that
+ * become meaningless once fuzzing twiddles a related bit.
+ */
+static void
+clear_pad_bits(struct brw_instruction *inst)
+{
+ if (inst->header.opcode != BRW_OPCODE_SEND &&
+ inst->header.opcode != BRW_OPCODE_SENDC &&
+ inst->header.opcode != BRW_OPCODE_BREAK &&
+ inst->header.opcode != BRW_OPCODE_CONTINUE &&
+ inst->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
+ inst->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE) {
+ if (inst->bits3.da1.src1_address_mode)
+ inst->bits3.ia1.pad1 = 0;
+ else
+ inst->bits3.da1.pad0 = 0;
+ }
+}
+
+static bool
+skip_bit(struct brw_instruction *src, int bit)
+{
+ /* pad bit */
+ if (bit == 7)
+ return true;
+
+ /* The compact bit -- uncompacted can't have it set. */
+ if (bit == 29)
+ return true;
+
+ /* pad bit */
+ if (bit == 47)
+ return true;
+
+ /* pad bits */
+ if (bit >= 90 && bit <= 95)
+ return true;
+
+ /* sometimes these are pad bits. */
+ if (src->header.opcode != BRW_OPCODE_SEND &&
+ src->header.opcode != BRW_OPCODE_SENDC &&
+ src->header.opcode != BRW_OPCODE_BREAK &&
+ src->header.opcode != BRW_OPCODE_CONTINUE &&
+ src->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
+ src->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE &&
+ bit >= 121) {
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+test_fuzz_compact_instruction(struct brw_compile *p,
+ struct brw_instruction src)
+{
+ for (int bit0 = 0; bit0 < 128; bit0++) {
+ if (skip_bit(&src, bit0))
+ continue;
+
+ for (int bit1 = 0; bit1 < 128; bit1++) {
+ struct brw_instruction instr = src;
+ uint32_t *bits = (uint32_t *)&instr;
+
+ if (skip_bit(&src, bit1))
+ continue;
+
+ bits[bit0 / 32] ^= (1 << (bit0 & 31));
+ bits[bit1 / 32] ^= (1 << (bit1 & 31));
+
+ clear_pad_bits(&instr);
+
+ if (!test_compact_instruction(p, instr)) {
+ printf(" twiddled bits for fuzzing %d, %d\n", bit0, bit1);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static void
+gen_ADD_GRF_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec8_grf(0, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+ struct brw_reg g4 = brw_vec8_grf(4, 0);
+
+ brw_ADD(p, g0, g2, g4);
+}
+
+static void
+gen_ADD_GRF_GRF_IMM(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec8_grf(0, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+
+ brw_ADD(p, g0, g2, brw_imm_f(1.0));
+}
+
+static void
+gen_ADD_GRF_GRF_IMM_d(struct brw_compile *p)
+{
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D);
+ struct brw_reg g2 = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_D);
+
+ brw_ADD(p, g0, g2, brw_imm_d(1));
+}
+
+static void
+gen_MOV_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec8_grf(0, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+
+ brw_MOV(p, g0, g2);
+}
+
+static void
+gen_ADD_MRF_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+ struct brw_reg g4 = brw_vec8_grf(4, 0);
+
+ brw_ADD(p, m6, g2, g4);
+}
+
+static void
+gen_ADD_vec1_GRF_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec1_grf(0, 0);
+ struct brw_reg g2 = brw_vec1_grf(2, 0);
+ struct brw_reg g4 = brw_vec1_grf(4, 0);
+
+ brw_ADD(p, g0, g2, g4);
+}
+
+static void
+gen_PLN_MRF_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
+ struct brw_reg interp = brw_vec1_grf(2, 0);
+ struct brw_reg g4 = brw_vec8_grf(4, 0);
+
+ brw_PLN(p, m6, interp, g4);
+}
+
+static void
+gen_f0_MOV_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec8_grf(0, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, true);
+ brw_MOV(p, g0, g2);
+ brw_pop_insn_state(p);
+}
+
+/* The handling of f1 vs f0 changes between gen6 and gen7. Explicitly test
+ * it, so that we run the fuzzing can run over all the other bits that might
+ * interact with it.
+ */
+static void
+gen_f1_MOV_GRF_GRF(struct brw_compile *p)
+{
+ struct brw_reg g0 = brw_vec8_grf(0, 0);
+ struct brw_reg g2 = brw_vec8_grf(2, 0);
+
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, true);
+ current_insn(p)->bits2.da1.flag_reg_nr = 1;
+ brw_MOV(p, g0, g2);
+ brw_pop_insn_state(p);
+}
+
+struct {
+ void (*func)(struct brw_compile *p);
+} tests[] = {
+ { gen_MOV_GRF_GRF },
+ { gen_ADD_GRF_GRF_GRF },
+ { gen_ADD_GRF_GRF_IMM },
+ { gen_ADD_GRF_GRF_IMM_d },
+ { gen_ADD_MRF_GRF_GRF },
+ { gen_ADD_vec1_GRF_GRF_GRF },
+ { gen_PLN_MRF_GRF_GRF },
+ { gen_f0_MOV_GRF_GRF },
+ { gen_f1_MOV_GRF_GRF },
+};
+
+int
+main(int argc, char **argv)
+{
+ struct brw_context *brw = calloc(1, sizeof(*brw));
+ struct intel_context *intel = &brw->intel;
+ intel->gen = 6;
+ int ret = 0;
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+ for (int align_16 = 0; align_16 <= 1; align_16++) {
+ struct brw_compile *p = rzalloc(NULL, struct brw_compile);
+ brw_init_compile(brw, p, p);
+
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ if (align_16)
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ else
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ tests[i].func(p);
+ assert(p->nr_insn == 1);
+
+ if (!test_compact_instruction(p, p->store[0])) {
+ ret = 1;
+ continue;
+ }
+
+ if (!test_fuzz_compact_instruction(p, p->store[0])) {
+ ret = 1;
+ continue;
+ }
+
+ ralloc_free(p);
+ }
+ }
+
+ return ret;
+}