summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2014-04-30 16:28:59 -0700
committerMatt Turner <[email protected]>2014-05-24 23:03:24 -0700
commit1acb3a290e34779521854a7e4e035e1cf78ab746 (patch)
tree7133ca3f065f2496c395aa9a89639c5c7e83e7d7
parent8942f44c8d2ff5454d523b85ba126c6fae0d81ff (diff)
i965: Support compacted instructions with immediate sources.
Note the weirdness with src1 subregs. The compacted immediate fields are uncompacted to bits [127:96] and the high five bits of the subreg mapping maps to bits [100:96]. Number of compacted instructions: 790085 -> 817752 (3.50%) Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c83
1 files changed, 63 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index f6f055fd5fa..f40ba047697 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -373,13 +373,16 @@ set_datatype_index(struct brw_compact_instruction *dst,
static bool
set_subreg_index(struct brw_compact_instruction *dst,
- struct brw_instruction *src)
+ struct brw_instruction *src,
+ bool is_immediate)
{
uint16_t uncompacted = 0;
uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
- uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
+
+ if (!is_immediate)
+ uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
for (int i = 0; i < 32; i++) {
if (subreg_table[i] == uncompacted) {
@@ -424,20 +427,40 @@ set_src0_index(struct brw_compact_instruction *dst,
static bool
set_src1_index(struct brw_compact_instruction *dst,
- struct brw_instruction *src)
+ struct brw_instruction *src, bool is_immediate)
{
- uint16_t compacted, uncompacted = 0;
+ if (is_immediate) {
+ dst->dw1.src1_index = (src->bits3.ud >> 8) & 0x1f;
+ } else {
+ uint16_t compacted, uncompacted;
- uncompacted |= (src->bits3.ud >> 13) & 0xfff;
+ uncompacted = (src->bits3.ud >> 13) & 0xfff;
- if (!get_src_index(uncompacted, &compacted))
- return false;
+ if (!get_src_index(uncompacted, &compacted))
+ return false;
- dst->dw1.src1_index = compacted;
+ dst->dw1.src1_index = compacted;
+ }
return true;
}
+/* Compacted instructions have 12-bits for immediate sources, and a 13th bit
+ * that's replicated through the high 20 bits.
+ *
+ * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
+ * of packed vectors as compactable immediates.
+ */
+static bool
+is_compactable_immediate(unsigned imm)
+{
+ /* We get the low 12 bits as-is. */
+ imm &= ~0xfff;
+
+ /* We get one bit replicated through the top 20 bits. */
+ return imm == 0 || imm == 0xfffff000;
+}
+
/**
* Tries to compact instruction src into dst.
*
@@ -464,10 +487,11 @@ brw_try_compact_instruction(struct brw_compile *p,
return false;
}
- /* FINISHME: immediates */
- if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
- src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ bool is_immediate = src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
+ src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE;
+ if (is_immediate && !is_compactable_immediate(src->bits3.ud)) {
return false;
+ }
memset(&temp, 0, sizeof(temp));
@@ -477,7 +501,7 @@ brw_try_compact_instruction(struct brw_compile *p,
return false;
if (!set_datatype_index(&temp, src))
return false;
- if (!set_subreg_index(&temp, src))
+ if (!set_subreg_index(&temp, src, is_immediate))
return false;
temp.dw0.acc_wr_control = src->header.acc_wr_control;
temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
@@ -486,11 +510,15 @@ brw_try_compact_instruction(struct brw_compile *p,
temp.dw0.cmpt_ctrl = 1;
if (!set_src0_index(&temp, src))
return false;
- if (!set_src1_index(&temp, src))
+ if (!set_src1_index(&temp, src, is_immediate))
return false;
temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
- temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
+ if (is_immediate) {
+ temp.dw1.src1_reg_nr = src->bits3.ud & 0xff;
+ } else {
+ temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
+ }
*dst = temp;
@@ -547,11 +575,17 @@ set_uncompacted_src0(struct brw_instruction *dst,
static void
set_uncompacted_src1(struct brw_instruction *dst,
- struct brw_compact_instruction *src)
+ struct brw_compact_instruction *src, bool is_immediate)
{
- uint16_t uncompacted = src_index_table[src->dw1.src1_index];
-
- dst->bits3.ud |= uncompacted << 13;
+ if (is_immediate) {
+ signed high5 = src->dw1.src1_index;
+ /* Replicate top bit of src1_index into high 20 bits of the immediate. */
+ dst->bits3.ud = (high5 << 27) >> 19;
+ } else {
+ uint16_t uncompacted = src_index_table[src->dw1.src1_index];
+
+ dst->bits3.ud |= uncompacted << 13;
+ }
}
void
@@ -566,16 +600,25 @@ brw_uncompact_instruction(struct brw_context *brw,
set_uncompacted_control(brw, dst, src);
set_uncompacted_datatype(dst, src);
+
+ /* src0/1 register file fields are in the datatype table. */
+ bool is_immediate = dst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
+ dst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE;
+
set_uncompacted_subreg(dst, src);
dst->header.acc_wr_control = src->dw0.acc_wr_control;
dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
if (brw->gen <= 6)
dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
set_uncompacted_src0(dst, src);
- set_uncompacted_src1(dst, src);
+ set_uncompacted_src1(dst, src, is_immediate);
dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
- dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
+ if (is_immediate) {
+ dst->bits3.ud |= src->dw1.src1_reg_nr;
+ } else {
+ dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
+ }
}
void brw_debug_compact_uncompact(struct brw_context *brw,