aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorConnor Abbott <[email protected]>2020-03-06 18:06:06 +0100
committerMarge Bot <[email protected]>2020-04-09 15:56:55 +0000
commitc8b0f904398cdc30ffc67c162bc3f570bf887ed9 (patch)
tree5ac775ac19befdd1f6f77a67fa88bc3dbc1c5911 /src/freedreno
parent122a900d7de826dcd1056f2ad2ea4c72d9129c06 (diff)
ir3: Add bindless instruction encoding
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/disasm-a3xx.c187
-rw-r--r--src/freedreno/ir3/instr-a3xx.h148
-rw-r--r--src/freedreno/ir3/ir3.c45
3 files changed, 277 insertions, 103 deletions
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index cd45e912703..d957e77f853 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -117,7 +117,10 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
else
fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
} else if ((reg.num == REG_A0) && !c) {
- fprintf(ctx->out, "a0.%c", component[reg.comp]);
+ /* This matches libllvm output, the second (scalar) address register
+ * seems to be called a1.x instead of a0.y.
+ */
+ fprintf(ctx->out, "a%d.x", reg.comp);
} else if ((reg.num == REG_P0) && !c) {
fprintf(ctx->out, "p0.%c", component[reg.comp]);
} else {
@@ -448,15 +451,70 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
[opc_op(OPC_RGETPOS)] = { true, false, false, false, },
[opc_op(OPC_RGETINFO)] = { false, false, false, false, },
};
+
+ static const struct {
+ bool indirect;
+ bool bindless;
+ bool use_a1;
+ bool uniform;
+ } desc_features[8] = {
+ [CAT5_NONUNIFORM] = { .indirect = true, },
+ [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
+ [CAT5_BINDLESS_IMM] = { .bindless = true, },
+ [CAT5_BINDLESS_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .uniform = true,
+ },
+ [CAT5_BINDLESS_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ },
+ [CAT5_BINDLESS_A1_IMM] = {
+ .bindless = true,
+ .use_a1 = true,
+ },
+ [CAT5_BINDLESS_A1_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .uniform = true,
+ .use_a1 = true,
+ },
+ [CAT5_BINDLESS_A1_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .use_a1 = true,
+ },
+ };
+
instr_cat5_t *cat5 = &instr->cat5;
int i;
+ bool desc_indirect =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].indirect;
+ bool bindless =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].bindless;
+ bool use_a1 =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].use_a1;
+ bool uniform =
+ cat5->is_s2en_bindless &&
+ desc_features[cat5->s2en_bindless.desc_mode].uniform;
+
if (cat5->is_3d) fprintf(ctx->out, ".3d");
if (cat5->is_a) fprintf(ctx->out, ".a");
if (cat5->is_o) fprintf(ctx->out, ".o");
if (cat5->is_p) fprintf(ctx->out, ".p");
if (cat5->is_s) fprintf(ctx->out, ".s");
- if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
+ if (desc_indirect) fprintf(ctx->out, ".s2en");
+ if (uniform) fprintf(ctx->out, ".uniform");
+
+ if (bindless) {
+ unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
+ fprintf(ctx->out, ".base%d", base);
+ }
fprintf(ctx->out, " ");
@@ -483,34 +541,47 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
false, false, false);
}
- if (cat5->is_s2en) {
- if (cat5->is_o || info[cat5->opc].src2) {
- fprintf(ctx->out, ", ");
- print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full,
- false, false, false, false, false, false);
- }
+ if (cat5->is_o || info[cat5->opc].src2) {
fprintf(ctx->out, ", ");
- print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
- false, false, false);
- } else {
- if (cat5->is_o || info[cat5->opc].src2) {
- fprintf(ctx->out, ", ");
- print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
- false, false, false, false, false, false);
+ print_reg_src(ctx, (reg_t)(cat5->src2), cat5->full,
+ false, false, false, false, false, false);
+ }
+ if (cat5->is_s2en_bindless) {
+ if (!desc_indirect) {
+ if (info[cat5->opc].samp) {
+ if (use_a1)
+ fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
+ else
+ fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
+ }
+
+ if (info[cat5->opc].tex && !use_a1) {
+ fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
+ }
}
+ } else {
if (info[cat5->opc].samp)
fprintf(ctx->out, ", s#%d", cat5->norm.samp);
if (info[cat5->opc].tex)
fprintf(ctx->out, ", t#%d", cat5->norm.tex);
}
+ if (desc_indirect) {
+ fprintf(ctx->out, ", ");
+ print_reg_src(ctx, (reg_t)(cat5->s2en_bindless.src3), bindless,
+ false, false, false, false, false, false);
+ }
+
+ if (use_a1)
+ fprintf(ctx->out, ", a1.x");
+
if (debug & PRINT_VERBOSE) {
- if (cat5->is_s2en) {
- if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
- fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+ if (cat5->is_s2en_bindless) {
+ if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
+ fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
} else {
- if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
- fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+ if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
+ fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
}
}
}
@@ -833,46 +904,66 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
{
instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
- struct reginfo src1, src2;
- bool has_dest = _OPC(6, cat6->opc) == OPC_LDIB;
- char ss = 0;
+ struct reginfo src1, src2, ssbo;
+ bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
- memset(&src1, 0, sizeof(src1));
- memset(&src2, 0, sizeof(src2));
+ static const struct {
+ bool indirect;
+ bool bindless;
+ bool uniform;
+ } desc_features[8] = {
+ [CAT6_IMM] = { false },
+ [CAT6_BINDLESS_IMM] = { .bindless = true, },
+ [CAT6_BINDLESS_UNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ .uniform = true,
+ },
+ [CAT6_BINDLESS_NONUNIFORM] = {
+ .bindless = true,
+ .indirect = true,
+ },
+ };
- fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
- fprintf(ctx->out, ".%dd", cat6->d + 1);
- fprintf(ctx->out, ".%s", type[cat6->type]);
- fprintf(ctx->out, ".%u ", cat6->type_size + 1);
+ bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
+ bool bindless = desc_features[cat6->desc_mode].bindless;
+ bool uniform = desc_features[cat6->desc_mode].uniform;
- if (has_dest) {
- src2.reg = (reg_t)(cat6->src2);
- src2.full = true; // XXX
- print_src(ctx, &src2);
- fprintf(ctx->out, ", ");
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+ memset(&ssbo, 0, sizeof(ssbo));
+
+ if (uses_type) {
+ fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
+ fprintf(ctx->out, ".%dd", cat6->d + 1);
+ fprintf(ctx->out, ".%s", type[cat6->type]);
}
+ fprintf(ctx->out, ".%u", cat6->type_size + 1);
- /* NOTE: blob seems to use old encoding for ldl/stl (local memory) */
- ss = 'g';
+ if (bindless)
+ fprintf(ctx->out, ".base%d", cat6->base);
+ if (uniform)
+ fprintf(ctx->out, ".uniform");
+ fprintf(ctx->out, " ");
+
+ src2.reg = (reg_t)(cat6->src2);
+ src2.full = true; // XXX
+ print_src(ctx, &src2);
+ fprintf(ctx->out, ", ");
- fprintf(ctx->out, "%c[%u", ss, cat6->ssbo);
- fprintf(ctx->out, "] + ");
src1.reg = (reg_t)(cat6->src1);
src1.full = true; // XXX
print_src(ctx, &src1);
-
- if (!has_dest) {
- fprintf(ctx->out, ", ");
-
- src2.reg = (reg_t)(cat6->src2);
- src2.full = true; // XXX
- print_src(ctx, &src2);
- }
+ fprintf(ctx->out, ", ");
+ ssbo.reg = (reg_t)(cat6->ssbo);
+ ssbo.im = !indirect_ssbo;
+ ssbo.full = true;
+ print_src(ctx, &ssbo);
if (debug & PRINT_VERBOSE) {
- fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1,
- cat6->pad2, cat6->pad3, cat6->pad4);
+ fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
+ cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
}
}
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index b3649f24bdf..f36c73b88e2 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -567,6 +567,57 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat4_t;
+/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
+ * if so, how to get the (base, index) pair for both sampler and texture.
+ * There is a single base embedded in the instruction, which is always used
+ * for the texture.
+ */
+typedef enum {
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is not presumed to be uniform. This is
+ * backwards-compatible with earlier generations, where this field was
+ * always 0 and nonuniform-indexed sampling always worked.
+ */
+ CAT5_NONUNIFORM = 0,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_A1_UNIFORM = 1,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is *not* presumed to be uniform.
+ */
+ CAT5_BINDLESS_NONUNIFORM = 2,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is *not* presumed to be
+ * uniform.
+ */
+ CAT5_BINDLESS_A1_NONUNIFORM = 3,
+
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is presumed to be uniform.
+ */
+ CAT5_UNIFORM = 4,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_UNIFORM = 5,
+
+ /* The texture and sampler share the same base, get sampler index from low
+ * 4 bits of src3 and texture index from high 4 bits.
+ */
+ CAT5_BINDLESS_IMM = 6,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the texture
+ * index comes from the next 8 bits of a1.x. The sampler index is an
+ * immediate in src3.
+ */
+ CAT5_BINDLESS_A1_IMM = 7,
+} cat5_desc_mode_t;
+
typedef struct PACKED {
/* dword0: */
union PACKED {
@@ -581,39 +632,41 @@ typedef struct PACKED {
} norm;
/* s2en case: */
struct PACKED {
- uint32_t full : 1; /* not half */
- uint32_t src1 : 8;
- uint32_t src2 : 11;
- uint32_t dummy1 : 1;
- uint32_t src3 : 8;
- uint32_t dummy2 : 3;
- } s2en;
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 2;
+ uint32_t base_hi : 2;
+ uint32_t src3 : 8;
+ uint32_t desc_mode : 3;
+ } s2en_bindless;
/* same in either case: */
// XXX I think, confirm this
struct PACKED {
uint32_t full : 1; /* not half */
uint32_t src1 : 8;
- uint32_t pad : 23;
+ uint32_t src2 : 8;
+ uint32_t pad : 15;
};
};
/* dword1: */
- uint32_t dst : 8;
- uint32_t wrmask : 4; /* write-mask */
- uint32_t type : 3;
- uint32_t dummy2 : 1; /* seems to be ignored */
- uint32_t is_3d : 1;
-
- uint32_t is_a : 1;
- uint32_t is_s : 1;
- uint32_t is_s2en : 1;
- uint32_t is_o : 1;
- uint32_t is_p : 1;
-
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dst : 8;
+ uint32_t wrmask : 4; /* write-mask */
+ uint32_t type : 3;
+ uint32_t base_lo : 1; /* used with bindless */
+ uint32_t is_3d : 1;
+
+ uint32_t is_a : 1;
+ uint32_t is_s : 1;
+ uint32_t is_s2en_bindless : 1;
+ uint32_t is_o : 1;
+ uint32_t is_p : 1;
+
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
} instr_cat5_t;
/* dword0 encoding for src_off: [src1 + off], src2: */
@@ -748,43 +801,72 @@ typedef union PACKED {
};
} instr_cat6_t;
+/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
+ */
+typedef enum {
+ /* Use old GL binding model with an immediate index.
+ * TODO: find CAT6_UNIFORM and CAT6_NONUNIFORM
+ */
+ CAT6_IMM = 0,
+
+ /* Use the bindless model, with an immediate index.
+ */
+ CAT6_BINDLESS_IMM = 4,
+
+ /* Use the bindless model, with a uniform register index.
+ */
+ CAT6_BINDLESS_UNIFORM = 5,
+
+ /* Use the bindless model, with a register index that isn't guaranteed
+ * to be uniform. This presumably checks if the indices are equal and
+ * splits up the load/store, because it works the way you would
+ * expect.
+ */
+ CAT6_BINDLESS_NONUNIFORM = 6,
+} cat6_desc_mode_t;
+
/**
* For atomic ops (which return a value):
*
- * pad1=1, pad2=c, pad3=0, pad4=3
+ * pad1=1, pad3=c, pad5=3
* src1 - vecN offset/coords
* src2.x - is actually dest register
* src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
* and src2.z is 'data'
*
* For stib (which does not return a value):
- * pad1=0, pad2=c, pad3=0, pad4=2
+ * pad1=0, pad3=c, pad5=2
* src1 - vecN offset/coords
* src2 - value to store
*
* For ldib:
- * pad1=1, pad2=c, pad3=0, pad4=2
+ * pad1=1, pad3=c, pad5=2
* src1 - vecN offset/coords
*
* for ldc (load from UBO using descriptor):
- * pad1=0, pad2=8, pad3=0, pad4=2
+ * pad1=0, pad3=8, pad5=2
+ *
+ * pad2 and pad5 are only observed to be 0.
*/
typedef struct PACKED {
/* dword0: */
- uint32_t pad1 : 9;
+ uint32_t pad1 : 1;
+ uint32_t base : 3;
+ uint32_t pad2 : 2;
+ uint32_t desc_mode : 3;
uint32_t d : 2;
uint32_t typed : 1;
uint32_t type_size : 2;
uint32_t opc : 5;
- uint32_t pad2 : 5;
+ uint32_t pad3 : 5;
uint32_t src1 : 8; /* coordinate/offset */
/* dword1: */
uint32_t src2 : 8; /* or the dst for load instructions */
- uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo??
+ uint32_t pad4 : 1; //mustbe0 ??
uint32_t ssbo : 8; /* ssbo/image binding point */
uint32_t type : 3;
- uint32_t pad4 : 7;
+ uint32_t pad5 : 7;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
@@ -869,7 +951,7 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
* cmdstream traces I have indicates that the pad bit is zero
* in all cases. So we can use this to detect new encoding:
*/
- if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
+ if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
assert(gpu_id >= 600);
assert(instr->cat6.opc == 0);
return false;
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 4ac50aec0a3..7bdf8a39ba8 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -482,20 +482,23 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
}
+ if (src2) {
+ iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+ cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+ }
+
if (instr->flags & IR3_INSTR_S2EN) {
struct ir3_register *samp_tex = instr->regs[1];
- if (src2) {
- iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
- cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
- }
iassert(samp_tex->flags & IR3_REG_HALF);
- cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+ cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+ /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
+ * this is what the blob does and it is presumably faster, but first
+ * we should confirm it is actually nonuniform and figure out when the
+ * whole descriptor mode mechanism was introduced.
+ */
+ cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
iassert(!(instr->cat5.samp | instr->cat5.tex));
} else {
- if (src2) {
- iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
- cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
- }
cat5->norm.samp = instr->cat5.samp;
cat5->norm.tex = instr->cat5.tex;
}
@@ -506,7 +509,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
cat5->is_3d = !!(instr->flags & IR3_INSTR_3D);
cat5->is_a = !!(instr->flags & IR3_INSTR_A);
cat5->is_s = !!(instr->flags & IR3_INSTR_S);
- cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN);
+ cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
cat5->is_o = !!(instr->flags & IR3_INSTR_O);
cat5->is_p = !!(instr->flags & IR3_INSTR_P);
cat5->opc = instr->opc;
@@ -564,31 +567,29 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
case OPC_ATOMIC_OR:
case OPC_ATOMIC_XOR:
cat6->pad1 = 0x1;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x3;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x3;
break;
case OPC_STIB:
cat6->pad1 = 0x0;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x2;
break;
case OPC_LDIB:
cat6->pad1 = 0x1;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x2;
break;
case OPC_LDC:
cat6->pad1 = 0x0;
- cat6->pad2 = 0x8;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0x8;
+ cat6->pad5 = 0x2;
break;
default:
iassert(0);
}
+ cat6->pad2 = 0x0;
+ cat6->pad4 = 0x0;
return 0;
}