summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-10-25 02:39:54 -0400
committerMarek Olšák <[email protected]>2019-11-23 00:02:10 -0500
commit35655865cbde648f6dcfad38fa095d9ec0c4e45b (patch)
tree2665e785e65a7d51754ab35a946865d4fd3e6073
parent4fe1d7822b07bf78975c12a2e27db6e5817b1a6b (diff)
nir/serialize: pack instructions better
Reviewed-by: Connor Abbott <[email protected]>
-rw-r--r--src/compiler/nir/nir_serialize.c403
1 files changed, 297 insertions, 106 deletions
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index 8a09299fa0e..64d68b5d549 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -24,6 +24,7 @@
#include "nir_serialize.h"
#include "nir_control_flow.h"
#include "util/u_dynarray.h"
+#include "util/u_math.h"
#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
#define MAX_OBJECT_IDS (1 << 30)
@@ -115,6 +116,52 @@ read_object(read_ctx *ctx)
return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
}
+static uint32_t
+encode_bit_size_3bits(uint8_t bit_size)
+{
+ /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
+ assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
+ if (bit_size)
+ return util_logbase2(bit_size) + 1;
+ return 0;
+}
+
+static uint8_t
+decode_bit_size_3bits(uint8_t bit_size)
+{
+ if (bit_size)
+ return 1 << (bit_size - 1);
+ return 0;
+}
+
+static uint8_t
+encode_num_components_in_3bits(uint8_t num_components)
+{
+ if (num_components <= 4)
+ return num_components;
+ if (num_components == 8)
+ return 5;
+ if (num_components == 16)
+ return 6;
+
+ unreachable("invalid number in num_components");
+ return 0;
+}
+
+static uint8_t
+decode_num_components_in_3bits(uint8_t value)
+{
+ if (value <= 4)
+ return value;
+ if (value == 5)
+ return 8;
+ if (value == 6)
+ return 16;
+
+ unreachable("invalid num_components encoding");
+ return 0;
+}
+
static void
write_constant(write_ctx *ctx, const nir_constant *c)
{
@@ -367,21 +414,108 @@ read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
}
}
+union packed_dest {
+ uint8_t u8;
+ struct {
+ uint8_t is_ssa:1;
+ uint8_t has_name:1;
+ uint8_t num_components:3;
+ uint8_t bit_size:3;
+ } ssa;
+ struct {
+ uint8_t is_ssa:1;
+ uint8_t is_indirect:1;
+ uint8_t _pad:6;
+ } reg;
+};
+
+union packed_instr {
+ uint32_t u32;
+ struct {
+ unsigned instr_type:4; /* always present */
+ unsigned _pad:20;
+ unsigned dest:8; /* always last */
+ } any;
+ struct {
+ unsigned instr_type:4;
+ unsigned exact:1;
+ unsigned no_signed_wrap:1;
+ unsigned no_unsigned_wrap:1;
+ unsigned saturate:1;
+ unsigned writemask:4;
+ unsigned op:9;
+ unsigned _pad:3;
+ unsigned dest:8;
+ } alu;
+ struct {
+ unsigned instr_type:4;
+ unsigned deref_type:3;
+ unsigned mode:10;
+ unsigned _pad:7;
+ unsigned dest:8;
+ } deref;
+ struct {
+ unsigned instr_type:4;
+ unsigned intrinsic:9;
+ unsigned num_components:3;
+ unsigned _pad:8;
+ unsigned dest:8;
+ } intrinsic;
+ struct {
+ unsigned instr_type:4;
+ unsigned last_component:4;
+ unsigned bit_size:3;
+ unsigned _pad:21;
+ } load_const;
+ struct {
+ unsigned instr_type:4;
+ unsigned last_component:4;
+ unsigned bit_size:3;
+ unsigned _pad:21;
+ } undef;
+ struct {
+ unsigned instr_type:4;
+ unsigned num_srcs:4;
+ unsigned op:4;
+ unsigned texture_array_size:12;
+ unsigned dest:8;
+ } tex;
+ struct {
+ unsigned instr_type:4;
+ unsigned num_srcs:20;
+ unsigned dest:8;
+ } phi;
+ struct {
+ unsigned instr_type:4;
+ unsigned type:2;
+ unsigned _pad:26;
+ } jump;
+};
+
+/* Write "lo24" as low 24 bits in the first uint32. */
static void
-write_dest(write_ctx *ctx, const nir_dest *dst)
+write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header)
{
- uint32_t val = dst->is_ssa;
+ STATIC_ASSERT(sizeof(union packed_dest) == 1);
+ union packed_dest dest;
+ dest.u8 = 0;
+
+ dest.ssa.is_ssa = dst->is_ssa;
if (dst->is_ssa) {
- val |= (!ctx->strip && dst->ssa.name) << 1;
- val |= dst->ssa.num_components << 2;
- val |= dst->ssa.bit_size << 5;
+ dest.ssa.has_name = !ctx->strip && dst->ssa.name;
+ dest.ssa.num_components =
+ encode_num_components_in_3bits(dst->ssa.num_components);
+ dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
} else {
- val |= !!(dst->reg.indirect) << 1;
+ dest.reg.is_indirect = !!(dst->reg.indirect);
}
- blob_write_uint32(ctx->blob, val);
+
+ header.any.dest = dest.u8;
+ blob_write_uint32(ctx->blob, header.u32);
+
if (dst->is_ssa) {
write_add_object(ctx, &dst->ssa);
- if (!ctx->strip && dst->ssa.name)
+ if (dest.ssa.has_name)
blob_write_string(ctx->blob, dst->ssa.name);
} else {
blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
@@ -392,22 +526,23 @@ write_dest(write_ctx *ctx, const nir_dest *dst)
}
static void
-read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
+read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
+ union packed_instr header)
{
- uint32_t val = blob_read_uint32(ctx->blob);
- bool is_ssa = val & 0x1;
- if (is_ssa) {
- bool has_name = val & 0x2;
- unsigned num_components = (val >> 2) & 0x7;
- unsigned bit_size = val >> 5;
- char *name = has_name ? blob_read_string(ctx->blob) : NULL;
+ union packed_dest dest;
+ dest.u8 = header.any.dest;
+
+ if (dest.ssa.is_ssa) {
+ unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
+ unsigned num_components =
+ decode_num_components_in_3bits(dest.ssa.num_components);
+ char *name = dest.ssa.has_name ? blob_read_string(ctx->blob) : NULL;
nir_ssa_dest_init(instr, dst, num_components, bit_size, name);
read_add_object(ctx, &dst->ssa);
} else {
- bool is_indirect = val & 0x2;
dst->reg.reg = read_object(ctx);
dst->reg.base_offset = blob_read_uint32(ctx->blob);
- if (is_indirect) {
+ if (dest.reg.is_indirect) {
dst->reg.indirect = ralloc(instr, nir_src);
read_src(ctx, dst->reg.indirect, instr);
}
@@ -417,19 +552,24 @@ read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr)
static void
write_alu(write_ctx *ctx, const nir_alu_instr *alu)
{
- blob_write_uint32(ctx->blob, alu->op);
- uint32_t flags = alu->exact;
- flags |= alu->no_signed_wrap << 1;
- flags |= alu->no_unsigned_wrap << 2;
- flags |= alu->dest.saturate << 3;
- flags |= alu->dest.write_mask << 4;
- blob_write_uint32(ctx->blob, flags);
+ /* 9 bits for nir_op */
+ STATIC_ASSERT(nir_num_opcodes <= 512);
+ union packed_instr header;
+ header.u32 = 0;
- write_dest(ctx, &alu->dest.dest);
+ header.alu.instr_type = alu->instr.type;
+ header.alu.exact = alu->exact;
+ header.alu.no_signed_wrap = alu->no_signed_wrap;
+ header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
+ header.alu.saturate = alu->dest.saturate;
+ header.alu.writemask = alu->dest.write_mask;
+ header.alu.op = alu->op;
+
+ write_dest(ctx, &alu->dest.dest, header);
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
write_src(ctx, &alu->src[i].src);
- flags = alu->src[i].negate;
+ uint32_t flags = alu->src[i].negate;
flags |= alu->src[i].abs << 1;
for (unsigned j = 0; j < 4; j++)
flags |= alu->src[i].swizzle[j] << (2 + 2 * j);
@@ -438,23 +578,21 @@ write_alu(write_ctx *ctx, const nir_alu_instr *alu)
}
static nir_alu_instr *
-read_alu(read_ctx *ctx)
+read_alu(read_ctx *ctx, union packed_instr header)
{
- nir_op op = blob_read_uint32(ctx->blob);
- nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op);
+ nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
- uint32_t flags = blob_read_uint32(ctx->blob);
- alu->exact = flags & 1;
- alu->no_signed_wrap = flags & 2;
- alu->no_unsigned_wrap = flags & 4;
- alu->dest.saturate = flags & 8;
- alu->dest.write_mask = flags >> 4;
+ alu->exact = header.alu.exact;
+ alu->no_signed_wrap = header.alu.no_signed_wrap;
+ alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
+ alu->dest.saturate = header.alu.saturate;
+ alu->dest.write_mask = header.alu.writemask;
- read_dest(ctx, &alu->dest.dest, &alu->instr);
+ read_dest(ctx, &alu->dest.dest, &alu->instr, header);
- for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
+ for (unsigned i = 0; i < nir_op_infos[header.alu.op].num_inputs; i++) {
read_src(ctx, &alu->src[i].src, &alu->instr);
- flags = blob_read_uint32(ctx->blob);
+ uint32_t flags = blob_read_uint32(ctx->blob);
alu->src[i].negate = flags & 1;
alu->src[i].abs = flags & 2;
for (unsigned j = 0; j < 4; j++)
@@ -467,12 +605,18 @@ read_alu(read_ctx *ctx)
static void
write_deref(write_ctx *ctx, const nir_deref_instr *deref)
{
- blob_write_uint32(ctx->blob, deref->deref_type);
+ assert(deref->deref_type < 8);
+ assert(deref->mode < (1 << 10));
- blob_write_uint32(ctx->blob, deref->mode);
- encode_type_to_blob(ctx->blob, deref->type);
+ union packed_instr header;
+ header.u32 = 0;
- write_dest(ctx, &deref->dest);
+ header.deref.instr_type = deref->instr.type;
+ header.deref.deref_type = deref->deref_type;
+ header.deref.mode = deref->mode;
+
+ write_dest(ctx, &deref->dest, header);
+ encode_type_to_blob(ctx->blob, deref->type);
if (deref->deref_type == nir_deref_type_var) {
write_object(ctx, deref->var);
@@ -505,15 +649,15 @@ write_deref(write_ctx *ctx, const nir_deref_instr *deref)
}
static nir_deref_instr *
-read_deref(read_ctx *ctx)
+read_deref(read_ctx *ctx, union packed_instr header)
{
- nir_deref_type deref_type = blob_read_uint32(ctx->blob);
+ nir_deref_type deref_type = header.deref.deref_type;
nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
- deref->mode = blob_read_uint32(ctx->blob);
- deref->type = decode_type_from_blob(ctx->blob);
+ read_dest(ctx, &deref->dest, &deref->instr, header);
- read_dest(ctx, &deref->dest, &deref->instr);
+ deref->mode = header.deref.mode;
+ deref->type = decode_type_from_blob(ctx->blob);
if (deref_type == nir_deref_type_var) {
deref->var = read_object(ctx);
@@ -550,15 +694,24 @@ read_deref(read_ctx *ctx)
static void
write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
{
- blob_write_uint32(ctx->blob, intrin->intrinsic);
-
+ /* 9 bits for nir_intrinsic_op */
+ STATIC_ASSERT(nir_num_intrinsics <= 512);
unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
+ assert(intrin->intrinsic < 512);
+
+ union packed_instr header;
+ header.u32 = 0;
- blob_write_uint32(ctx->blob, intrin->num_components);
+ header.intrinsic.instr_type = intrin->instr.type;
+ header.intrinsic.intrinsic = intrin->intrinsic;
+ header.intrinsic.num_components =
+ encode_num_components_in_3bits(intrin->num_components);
if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
- write_dest(ctx, &intrin->dest);
+ write_dest(ctx, &intrin->dest, header);
+ else
+ blob_write_uint32(ctx->blob, header.u32);
for (unsigned i = 0; i < num_srcs; i++)
write_src(ctx, &intrin->src[i]);
@@ -568,19 +721,19 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
}
static nir_intrinsic_instr *
-read_intrinsic(read_ctx *ctx)
+read_intrinsic(read_ctx *ctx, union packed_instr header)
{
- nir_intrinsic_op op = blob_read_uint32(ctx->blob);
-
+ nir_intrinsic_op op = header.intrinsic.intrinsic;
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
unsigned num_indices = nir_intrinsic_infos[op].num_indices;
- intrin->num_components = blob_read_uint32(ctx->blob);
+ intrin->num_components =
+ decode_num_components_in_3bits(header.intrinsic.num_components);
if (nir_intrinsic_infos[op].has_dest)
- read_dest(ctx, &intrin->dest, &intrin->instr);
+ read_dest(ctx, &intrin->dest, &intrin->instr, header);
for (unsigned i = 0; i < num_srcs; i++)
read_src(ctx, &intrin->src[i], &intrin->instr);
@@ -594,20 +747,25 @@ read_intrinsic(read_ctx *ctx)
static void
write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
{
- uint32_t val = lc->def.num_components;
- val |= lc->def.bit_size << 3;
- blob_write_uint32(ctx->blob, val);
+ assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.load_const.instr_type = lc->instr.type;
+ header.load_const.last_component = lc->def.num_components - 1;
+ header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
+
+ blob_write_uint32(ctx->blob, header.u32);
blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
write_add_object(ctx, &lc->def);
}
static nir_load_const_instr *
-read_load_const(read_ctx *ctx)
+read_load_const(read_ctx *ctx, union packed_instr header)
{
- uint32_t val = blob_read_uint32(ctx->blob);
-
nir_load_const_instr *lc =
- nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3);
+ nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
+ decode_bit_size_3bits(header.load_const.bit_size));
blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
read_add_object(ctx, &lc->def);
@@ -617,19 +775,25 @@ read_load_const(read_ctx *ctx)
static void
write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
{
- uint32_t val = undef->def.num_components;
- val |= undef->def.bit_size << 3;
- blob_write_uint32(ctx->blob, val);
+ assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.undef.instr_type = undef->instr.type;
+ header.undef.last_component = undef->def.num_components - 1;
+ header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
+
+ blob_write_uint32(ctx->blob, header.u32);
write_add_object(ctx, &undef->def);
}
static nir_ssa_undef_instr *
-read_ssa_undef(read_ctx *ctx)
+read_ssa_undef(read_ctx *ctx, union packed_instr header)
{
- uint32_t val = blob_read_uint32(ctx->blob);
-
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3);
+ nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
+ decode_bit_size_3bits(header.undef.bit_size));
read_add_object(ctx, &undef->def);
return undef;
@@ -652,12 +816,24 @@ union packed_tex_data {
static void
write_tex(write_ctx *ctx, const nir_tex_instr *tex)
{
- blob_write_uint32(ctx->blob, tex->num_srcs);
- blob_write_uint32(ctx->blob, tex->op);
+ assert(tex->num_srcs < 16);
+ assert(tex->op < 16);
+ assert(tex->texture_array_size < 1024);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.tex.instr_type = tex->instr.type;
+ header.tex.num_srcs = tex->num_srcs;
+ header.tex.op = tex->op;
+ header.tex.texture_array_size = tex->texture_array_size;
+
+ write_dest(ctx, &tex->dest, header);
+
blob_write_uint32(ctx->blob, tex->texture_index);
- blob_write_uint32(ctx->blob, tex->texture_array_size);
blob_write_uint32(ctx->blob, tex->sampler_index);
- blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
+ if (tex->op == nir_texop_tg4)
+ blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
union packed_tex_data packed = {
@@ -671,7 +847,6 @@ write_tex(write_ctx *ctx, const nir_tex_instr *tex)
};
blob_write_uint32(ctx->blob, packed.u32);
- write_dest(ctx, &tex->dest);
for (unsigned i = 0; i < tex->num_srcs; i++) {
blob_write_uint32(ctx->blob, tex->src[i].src_type);
write_src(ctx, &tex->src[i].src);
@@ -679,16 +854,18 @@ write_tex(write_ctx *ctx, const nir_tex_instr *tex)
}
static nir_tex_instr *
-read_tex(read_ctx *ctx)
+read_tex(read_ctx *ctx, union packed_instr header)
{
- unsigned num_srcs = blob_read_uint32(ctx->blob);
- nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs);
+ nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
+
+ read_dest(ctx, &tex->dest, &tex->instr, header);
- tex->op = blob_read_uint32(ctx->blob);
+ tex->op = header.tex.op;
tex->texture_index = blob_read_uint32(ctx->blob);
- tex->texture_array_size = blob_read_uint32(ctx->blob);
+ tex->texture_array_size = header.tex.texture_array_size;
tex->sampler_index = blob_read_uint32(ctx->blob);
- blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
+ if (tex->op == nir_texop_tg4)
+ blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
union packed_tex_data packed;
packed.u32 = blob_read_uint32(ctx->blob);
@@ -700,7 +877,6 @@ read_tex(read_ctx *ctx)
tex->is_new_style_shadow = packed.u.is_new_style_shadow;
tex->component = packed.u.component;
- read_dest(ctx, &tex->dest, &tex->instr);
for (unsigned i = 0; i < tex->num_srcs; i++) {
tex->src[i].src_type = blob_read_uint32(ctx->blob);
read_src(ctx, &tex->src[i].src, &tex->instr);
@@ -712,14 +888,18 @@ read_tex(read_ctx *ctx)
static void
write_phi(write_ctx *ctx, const nir_phi_instr *phi)
{
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.phi.instr_type = phi->instr.type;
+ header.phi.num_srcs = exec_list_length(&phi->srcs);
+
/* Phi nodes are special, since they may reference SSA definitions and
* basic blocks that don't exist yet. We leave two empty uint32_t's here,
* and then store enough information so that a later fixup pass can fill
* them in correctly.
*/
- write_dest(ctx, &phi->dest);
-
- blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs));
+ write_dest(ctx, &phi->dest, header);
nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
@@ -748,13 +928,11 @@ write_fixup_phis(write_ctx *ctx)
}
static nir_phi_instr *
-read_phi(read_ctx *ctx, nir_block *blk)
+read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
{
nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
- read_dest(ctx, &phi->dest, &phi->instr);
-
- unsigned num_srcs = blob_read_uint32(ctx->blob);
+ read_dest(ctx, &phi->dest, &phi->instr, header);
/* For similar reasons as before, we just store the index directly into the
* pointer, and let a later pass resolve the phi sources.
@@ -766,7 +944,7 @@ read_phi(read_ctx *ctx, nir_block *blk)
*/
nir_instr_insert_after_block(blk, &phi->instr);
- for (unsigned i = 0; i < num_srcs; i++) {
+ for (unsigned i = 0; i < header.phi.num_srcs; i++) {
nir_phi_src *src = ralloc(phi, nir_phi_src);
src->src.is_ssa = true;
@@ -808,14 +986,21 @@ read_fixup_phis(read_ctx *ctx)
static void
write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
{
- blob_write_uint32(ctx->blob, jmp->type);
+ assert(jmp->type < 4);
+
+ union packed_instr header;
+ header.u32 = 0;
+
+ header.jump.instr_type = jmp->instr.type;
+ header.jump.type = jmp->type;
+
+ blob_write_uint32(ctx->blob, header.u32);
}
static nir_jump_instr *
-read_jump(read_ctx *ctx)
+read_jump(read_ctx *ctx, union packed_instr header)
{
- nir_jump_type type = blob_read_uint32(ctx->blob);
- nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type);
+ nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
return jmp;
}
@@ -843,7 +1028,9 @@ read_call(read_ctx *ctx)
static void
write_instr(write_ctx *ctx, const nir_instr *instr)
{
- blob_write_uint32(ctx->blob, instr->type);
+ /* We have only 4 bits for the instruction type. */
+ assert(instr->type < 16);
+
switch (instr->type) {
case nir_instr_type_alu:
write_alu(ctx, nir_instr_as_alu(instr));
@@ -870,6 +1057,7 @@ write_instr(write_ctx *ctx, const nir_instr *instr)
write_jump(ctx, nir_instr_as_jump(instr));
break;
case nir_instr_type_call:
+ blob_write_uint32(ctx->blob, instr->type);
write_call(ctx, nir_instr_as_call(instr));
break;
case nir_instr_type_parallel_copy:
@@ -882,26 +1070,29 @@ write_instr(write_ctx *ctx, const nir_instr *instr)
static void
read_instr(read_ctx *ctx, nir_block *block)
{
- nir_instr_type type = blob_read_uint32(ctx->blob);
+ STATIC_ASSERT(sizeof(union packed_instr) == 4);
+ union packed_instr header;
+ header.u32 = blob_read_uint32(ctx->blob);
nir_instr *instr;
- switch (type) {
+
+ switch (header.any.instr_type) {
case nir_instr_type_alu:
- instr = &read_alu(ctx)->instr;
+ instr = &read_alu(ctx, header)->instr;
break;
case nir_instr_type_deref:
- instr = &read_deref(ctx)->instr;
+ instr = &read_deref(ctx, header)->instr;
break;
case nir_instr_type_intrinsic:
- instr = &read_intrinsic(ctx)->instr;
+ instr = &read_intrinsic(ctx, header)->instr;
break;
case nir_instr_type_load_const:
- instr = &read_load_const(ctx)->instr;
+ instr = &read_load_const(ctx, header)->instr;
break;
case nir_instr_type_ssa_undef:
- instr = &read_ssa_undef(ctx)->instr;
+ instr = &read_ssa_undef(ctx, header)->instr;
break;
case nir_instr_type_tex:
- instr = &read_tex(ctx)->instr;
+ instr = &read_tex(ctx, header)->instr;
break;
case nir_instr_type_phi:
/* Phi instructions are a bit of a special case when reading because we
@@ -909,10 +1100,10 @@ read_instr(read_ctx *ctx, nir_block *block)
* for us. Instead, we need to wait until all the blocks/instructions
* are read so that we can set their sources up.
*/
- read_phi(ctx, block);
+ read_phi(ctx, block, header);
return;
case nir_instr_type_jump:
- instr = &read_jump(ctx)->instr;
+ instr = &read_jump(ctx, header)->instr;
break;
case nir_instr_type_call:
instr = &read_call(ctx)->instr;