summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2015-07-23 15:31:13 -0400
committerRob Clark <[email protected]>2015-07-27 13:51:05 -0400
commitbc5e2bec303acd7fd962996bf369be5ce0e15cd2 (patch)
treebaeeec57ee7f500db3750b841ed3105747a5bce7 /src/gallium/drivers
parent4b15cb6daa29d4bdd268eac6c2e40fb1503e98fa (diff)
freedreno/ir3: updated cat6 encoding
Sync updated cat6 encoding from freedreno.git, needed to properly encode store instructions. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/ir3/disasm-a3xx.c223
-rw-r--r--src/gallium/drivers/freedreno/ir3/instr-a3xx.h87
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c27
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c2
5 files changed, 230 insertions, 113 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
index 48ae7c71b9f..83ed5ffdca0 100644
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
} else if ((reg.num == REG_P0) && !c) {
printf("p0.%c", component[reg.comp]);
} else {
- printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+ printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
}
}
@@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
print_reg(reg, full, r, c, im, neg, abs, addr_rel);
}
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+ reg_t reg;
+ bool full;
+ bool r;
+ bool c;
+ bool im;
+ bool neg;
+ bool abs;
+ bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+ print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+ info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+// print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
static void print_instr_cat0(instr_t *instr)
{
instr_cat0_t *cat0 = &instr->cat0;
@@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr)
{
instr_cat6_t *cat6 = &instr->cat6;
char sd = 0, ss = 0; /* dst/src address space */
- bool full = type_size(cat6->type) == 32;
bool nodst = false;
+ struct reginfo dst, src1, src2;
+ int src1off = 0, dstoff = 0;
- printf(".%s ", type[cat6->type]);
+ memset(&dst, 0, sizeof(dst));
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ case OPC_L2G:
+ case OPC_G2L:
+ dst.full = true;
+ src1.full = true;
+ src2.full = true;
+ break;
+ case OPC_STG:
+ case OPC_STL:
+ case OPC_STP:
+ case OPC_STI:
+ case OPC_STLW:
+ case OPC_STGB_4D_4:
+ case OPC_STIB:
+ dst.full = true;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ default:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = true;
+ src2.full = true;
+ break;
+ }
+
+ switch (cat6->opc) {
+ case OPC_PREFETCH:
+ case OPC_RESINFO:
+ break;
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ ss = cat6->g ? 'g' : 'l';
+ printf(".%c", ss);
+ printf(".%s", type[cat6->type]);
+ break;
+ default:
+ dst.im = cat6->g && !cat6->dst_off;
+ printf(".%s", type[cat6->type]);
+ break;
+ }
+ printf(" ");
switch (cat6->opc) {
case OPC_STG:
@@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr)
break;
case OPC_STI:
- full = false; // XXX or inverts??
+ dst.full = false; // XXX or inverts??
break;
}
- if (cat6->has_off) {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->a.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->a.src1), true,
- false, false, cat6->a.src1_im, false, false, false);
- if (cat6->a.off)
- printf("%+d", cat6->a.off);
- if (ss)
- printf("]");
- printf(", ");
- print_reg_src((reg_t)(cat6->a.src2), full,
- false, false, cat6->a.src2_im, false, false, false);
+ if (cat6->dst_off) {
+ dst.reg = (reg_t)(cat6->c.dst);
+ dstoff = cat6->c.off;
} else {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->b.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->b.src1), true,
- false, false, cat6->b.src1_im, false, false, false);
- if (ss)
+ dst.reg = (reg_t)(cat6->d.dst);
+ }
+
+ if (cat6->src_off) {
+ src1.reg = (reg_t)(cat6->a.src1);
+ src1.im = cat6->a.src1_im;
+ src2.reg = (reg_t)(cat6->a.src2);
+ src2.im = cat6->a.src2_im;
+ src1off = cat6->a.off;
+ } else {
+ src1.reg = (reg_t)(cat6->b.src1);
+ src1.im = cat6->b.src1_im;
+ src2.reg = (reg_t)(cat6->b.src2);
+ src2.im = cat6->b.src2_im;
+ }
+
+ if (!nodst) {
+ if (sd)
+ printf("%c[", sd);
+ /* note: dst might actually be a src (ie. address to store to) */
+ print_src(&dst);
+ if (dstoff)
+ printf("%+d", dstoff);
+ if (sd)
printf("]");
printf(", ");
- print_reg_src((reg_t)(cat6->b.src2), full,
- false, false, cat6->b.src2_im, false, false, false);
}
- if (debug & PRINT_VERBOSE) {
- switch (cat6->opc) {
- case OPC_LDG:
- case OPC_LDP:
- /* load instructions: */
- if (cat6->a.dummy2|cat6->a.dummy3)
- printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
- break;
- case OPC_STG:
- case OPC_STP:
- case OPC_STI:
- /* store instructions: */
- if (cat6->b.dummy2|cat6->b.dummy2)
- printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
- if (cat6->b.ignore0)
- printf("\t{?? %x}", cat6->b.ignore0);
- break;
- }
+ if (ss)
+ printf("%c[", ss);
+
+ /* can have a larger than normal immed, so hack: */
+ if (src1.im) {
+ printf("%u", src1.reg.dummy13);
+ } else {
+ print_src(&src1);
+ }
+
+ if (src1off)
+ printf("%+d", src1off);
+ if (ss)
+ printf("]");
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ break;
+ default:
+ printf(", ");
+ print_src(&src2);
+ break;
}
}
@@ -711,19 +794,19 @@ struct opc_info {
OPC(6, OPC_LDLW, ldlw),
OPC(6, OPC_STLW, stlw),
OPC(6, OPC_RESFMT, resfmt),
- OPC(6, OPC_RESINFO, resinf),
- OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l),
- OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l),
- OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l),
- OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l),
- OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l),
- OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
- OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l),
- OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l),
- OPC(6, OPC_ATOMIC_AND_L, atomic.and.l),
- OPC(6, OPC_ATOMIC_OR_L, atomic.or.l),
- OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l),
- OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d),
+ OPC(6, OPC_RESINFO, resinfo),
+ OPC(6, OPC_ATOMIC_ADD, atomic.add),
+ OPC(6, OPC_ATOMIC_SUB, atomic.sub),
+ OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
+ OPC(6, OPC_ATOMIC_INC, atomic.inc),
+ OPC(6, OPC_ATOMIC_DEC, atomic.dec),
+ OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+ OPC(6, OPC_ATOMIC_MIN, atomic.min),
+ OPC(6, OPC_ATOMIC_MAX, atomic.max),
+ OPC(6, OPC_ATOMIC_AND, atomic.and),
+ OPC(6, OPC_ATOMIC_OR, atomic.or),
+ OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.3d),
OPC(6, OPC_STGB_4D_4, stgb.4d.4),
OPC(6, OPC_STIB, stib),
OPC(6, OPC_LDC_4, ldc.4),
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index efb07ea479e..c3fb68d511c 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -173,17 +173,17 @@ typedef enum {
OPC_STLW = 11,
OPC_RESFMT = 14,
OPC_RESINFO = 15,
- OPC_ATOMIC_ADD_L = 16,
- OPC_ATOMIC_SUB_L = 17,
- OPC_ATOMIC_XCHG_L = 18,
- OPC_ATOMIC_INC_L = 19,
- OPC_ATOMIC_DEC_L = 20,
- OPC_ATOMIC_CMPXCHG_L = 21,
- OPC_ATOMIC_MIN_L = 22,
- OPC_ATOMIC_MAX_L = 23,
- OPC_ATOMIC_AND_L = 24,
- OPC_ATOMIC_OR_L = 25,
- OPC_ATOMIC_XOR_L = 26,
+ OPC_ATOMIC_ADD = 16,
+ OPC_ATOMIC_SUB = 17,
+ OPC_ATOMIC_XCHG = 18,
+ OPC_ATOMIC_INC = 19,
+ OPC_ATOMIC_DEC = 20,
+ OPC_ATOMIC_CMPXCHG = 21,
+ OPC_ATOMIC_MIN = 22,
+ OPC_ATOMIC_MAX = 23,
+ OPC_ATOMIC_AND = 24,
+ OPC_ATOMIC_OR = 25,
+ OPC_ATOMIC_XOR = 26,
OPC_LDGB_TYPED_4D = 27,
OPC_STGB_4D_4 = 28,
OPC_STIB = 29,
@@ -575,7 +575,7 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat5_t;
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe1 : 1;
@@ -586,37 +586,50 @@ typedef struct PACKED {
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6a_t;
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe0 : 1;
- uint32_t src1 : 8;
- uint32_t ignore0 : 13;
+ uint32_t src1 : 13;
+ uint32_t ignore0 : 8;
uint32_t src1_im : 1;
uint32_t src2_im : 1;
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6b_t;
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ /* note: there is some weird stuff going on where sometimes
+ * cat6->a.off is involved.. but that seems like a bug in
+ * the blob, since it is used even if !cat6->src_off
+ * It would make sense for there to be some more bits to
+ * bring us to 11 bits worth of offset, but not sure..
+ */
+ int32_t off : 8;
+ uint32_t mustbe1 : 1;
+ uint32_t dst : 8;
+ uint32_t pad1 : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t pad0 : 23;
+} instr_cat6d_t;
+
/* I think some of the other cat6 instructions use additional
* sub-encodings..
*/
@@ -624,16 +637,20 @@ typedef struct PACKED {
typedef union PACKED {
instr_cat6a_t a;
instr_cat6b_t b;
+ instr_cat6c_t c;
+ instr_cat6d_t d;
struct PACKED {
/* dword0: */
- uint32_t has_off : 1;
+ uint32_t src_off : 1;
uint32_t pad1 : 31;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
+ uint32_t pad2 : 8;
+ uint32_t dst_off : 1;
+ uint32_t pad3 : 8;
uint32_t type : 3;
- uint32_t dummy3 : 2;
+ uint32_t g : 1; /* or in some cases it means dst immed */
+ uint32_t pad4 : 1;
uint32_t opc : 5;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a0cb74498ec..6d19a29275b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -506,25 +506,28 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
iassert(instr->regs_count >= 2);
- if (instr->cat6.offset || instr->opc == OPC_LDG) {
+ /* TODO we need a more comprehensive list about which instructions
+ * can be encoded which way. Or possibly use IR3_INSTR_0 flag to
+ * indicate to use the src_off encoding even if offset is zero
+ * (but then what to do about dst_off?)
+ */
+ if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
instr_cat6a_t *cat6a = ptr;
- cat6->has_off = true;
+ cat6->src_off = true;
- cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
}
- cat6a->off = instr->cat6.offset;
+ cat6a->off = instr->cat6.src_offset;
} else {
instr_cat6b_t *cat6b = ptr;
- cat6->has_off = false;
+ cat6->src_off = false;
- cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
@@ -533,10 +536,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
}
}
+ if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+ instr_cat6c_t *cat6c = ptr;
+ cat6->dst_off = true;
+ cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat6c->off = instr->cat6.dst_offset;
+ } else {
+ instr_cat6d_t *cat6d = ptr;
+ cat6->dst_off = false;
+ cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ }
+
cat6->type = instr->cat6.type;
cat6->opc = instr->opc;
cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
cat6->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat6->g = !!(instr->flags & IR3_INSTR_G);
cat6->opc_cat = 6;
return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index f11d8eda5f2..c3b61a0fe01 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -172,6 +172,7 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
+ IR3_INSTR_G = 0x400,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
@@ -209,7 +210,8 @@ struct ir3_instruction {
} cat5;
struct {
type_t type;
- int offset;
+ int src_offset;
+ int dst_offset;
int iim_val;
} cat6;
/* for meta-instructions, just used to hold extra data
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 22885ff85f3..bdba3aae36f 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1215,7 +1215,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction *load =
ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
load->cat6.type = TYPE_U32;
- load->cat6.offset = off + i * 4; /* byte offset */
+ load->cat6.src_offset = off + i * 4; /* byte offset */
dst[i] = load;
}
}