diff options
Diffstat (limited to 'src/freedreno/ir3/instr-a3xx.h')
-rw-r--r-- | src/freedreno/ir3/instr-a3xx.h | 148 |
1 files changed, 115 insertions, 33 deletions
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index b3649f24bdf..f36c73b88e2 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -567,6 +567,57 @@ typedef struct PACKED { uint32_t opc_cat : 3; } instr_cat4_t; +/* With is_bindless_s2en = 1, this determines whether bindless is enabled and + * if so, how to get the (base, index) pair for both sampler and texture. + * There is a single base embedded in the instruction, which is always used + * for the texture. + */ +typedef enum { + /* Use traditional GL binding model, get texture and sampler index + * from src3 which is not presumed to be uniform. This is + * backwards-compatible with earlier generations, where this field was + * always 0 and nonuniform-indexed sampling always worked. + */ + CAT5_NONUNIFORM = 0, + + /* The sampler base comes from the low 3 bits of a1.x, and the sampler + * and texture index come from src3 which is presumed to be uniform. + */ + CAT5_BINDLESS_A1_UNIFORM = 1, + + /* The texture and sampler share the same base, and the sampler and + * texture index come from src3 which is *not* presumed to be uniform. + */ + CAT5_BINDLESS_NONUNIFORM = 2, + + /* The sampler base comes from the low 3 bits of a1.x, and the sampler + * and texture index come from src3 which is *not* presumed to be + * uniform. + */ + CAT5_BINDLESS_A1_NONUNIFORM = 3, + + /* Use traditional GL binding model, get texture and sampler index + * from src3 which is presumed to be uniform. + */ + CAT5_UNIFORM = 4, + + /* The texture and sampler share the same base, and the sampler and + * texture index come from src3 which is presumed to be uniform. + */ + CAT5_BINDLESS_UNIFORM = 5, + + /* The texture and sampler share the same base, get sampler index from low + * 4 bits of src3 and texture index from high 4 bits. + */ + CAT5_BINDLESS_IMM = 6, + + /* The sampler base comes from the low 3 bits of a1.x, and the texture + * index comes from the next 8 bits of a1.x. The sampler index is an + * immediate in src3. + */ + CAT5_BINDLESS_A1_IMM = 7, +} cat5_desc_mode_t; + typedef struct PACKED { /* dword0: */ union PACKED { @@ -581,39 +632,41 @@ typedef struct PACKED { } norm; /* s2en case: */ struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 11; - uint32_t dummy1 : 1; - uint32_t src3 : 8; - uint32_t dummy2 : 3; - } s2en; + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t dummy1 : 2; + uint32_t base_hi : 2; + uint32_t src3 : 8; + uint32_t desc_mode : 3; + } s2en_bindless; /* same in either case: */ // XXX I think, confirm this struct PACKED { uint32_t full : 1; /* not half */ uint32_t src1 : 8; - uint32_t pad : 23; + uint32_t src2 : 8; + uint32_t pad : 15; }; }; /* dword1: */ - uint32_t dst : 8; - uint32_t wrmask : 4; /* write-mask */ - uint32_t type : 3; - uint32_t dummy2 : 1; /* seems to be ignored */ - uint32_t is_3d : 1; - - uint32_t is_a : 1; - uint32_t is_s : 1; - uint32_t is_s2en : 1; - uint32_t is_o : 1; - uint32_t is_p : 1; - - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; + uint32_t dst : 8; + uint32_t wrmask : 4; /* write-mask */ + uint32_t type : 3; + uint32_t base_lo : 1; /* used with bindless */ + uint32_t is_3d : 1; + + uint32_t is_a : 1; + uint32_t is_s : 1; + uint32_t is_s2en_bindless : 1; + uint32_t is_o : 1; + uint32_t is_p : 1; + + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; } instr_cat5_t; /* dword0 encoding for src_off: [src1 + off], src2: */ @@ -748,43 +801,72 @@ typedef union PACKED { }; } instr_cat6_t; +/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. + */ +typedef enum { + /* Use old GL binding model with an immediate index. + * TODO: find CAT6_UNIFORM and CAT6_NONUNIFORM + */ + CAT6_IMM = 0, + + /* Use the bindless model, with an immediate index. + */ + CAT6_BINDLESS_IMM = 4, + + /* Use the bindless model, with a uniform register index. + */ + CAT6_BINDLESS_UNIFORM = 5, + + /* Use the bindless model, with a register index that isn't guaranteed + * to be uniform. This presumably checks if the indices are equal and + * splits up the load/store, because it works the way you would + * expect. + */ + CAT6_BINDLESS_NONUNIFORM = 6, +} cat6_desc_mode_t; + /** * For atomic ops (which return a value): * - * pad1=1, pad2=c, pad3=0, pad4=3 + * pad1=1, pad3=c, pad5=3 * src1 - vecN offset/coords * src2.x - is actually dest register * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' * and src2.z is 'data' * * For stib (which does not return a value): - * pad1=0, pad2=c, pad3=0, pad4=2 + * pad1=0, pad3=c, pad5=2 * src1 - vecN offset/coords * src2 - value to store * * For ldib: - * pad1=1, pad2=c, pad3=0, pad4=2 + * pad1=1, pad3=c, pad5=2 * src1 - vecN offset/coords * * for ldc (load from UBO using descriptor): - * pad1=0, pad2=8, pad3=0, pad4=2 + * pad1=0, pad3=8, pad5=2 + * + * pad2 and pad5 are only observed to be 0. */ typedef struct PACKED { /* dword0: */ - uint32_t pad1 : 9; + uint32_t pad1 : 1; + uint32_t base : 3; + uint32_t pad2 : 2; + uint32_t desc_mode : 3; uint32_t d : 2; uint32_t typed : 1; uint32_t type_size : 2; uint32_t opc : 5; - uint32_t pad2 : 5; + uint32_t pad3 : 5; uint32_t src1 : 8; /* coordinate/offset */ /* dword1: */ uint32_t src2 : 8; /* or the dst for load instructions */ - uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo?? + uint32_t pad4 : 1; //mustbe0 ?? uint32_t ssbo : 8; /* ssbo/image binding point */ uint32_t type : 3; - uint32_t pad4 : 7; + uint32_t pad5 : 7; uint32_t jmp_tgt : 1; uint32_t sync : 1; uint32_t opc_cat : 3; @@ -869,7 +951,7 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) * cmdstream traces I have indicates that the pad bit is zero * in all cases. So we can use this to detect new encoding: */ - if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) { + if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) { assert(gpu_id >= 600); assert(instr->cat6.opc == 0); return false; |