summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-11-04 23:29:33 -0500
committerMarek Olšák <[email protected]>2019-11-23 00:02:10 -0500
commitef4630cf4f3ec2561d437909e2681869fee14873 (patch)
treec8040cef511496bf10088db6255dc2d26af438d2 /src/compiler/nir
parentd3346b275a476a5d3144b732610baa6b895a680a (diff)
nir/serialize: pack nir_intrinsic_instr::const_index[] better
Reviewed-by: Connor Abbott <[email protected]>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/nir_serialize.c89
1 files changed, 84 insertions, 5 deletions
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index 8290cd82d21..4d4f0bae514 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -574,6 +574,21 @@ union packed_dest {
} reg;
};
+enum intrinsic_const_indices_encoding {
+ /* Use the 6 bits of packed_const_indices to store 1-6 indices.
+ * 1 6-bit index, or 2 3-bit indices, or 3 2-bit indices, or
+ * 4-6 1-bit indices.
+ *
+ * The common case for load_ubo is 0, 0, 0, which is trivially represented.
+ * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
+ */
+ const_indices_6bit_all_combined,
+
+ const_indices_8bit, /* 8 bits per element */
+ const_indices_16bit, /* 16 bits per element */
+ const_indices_32bit, /* 32 bits per element */
+};
+
enum load_const_packing {
/* Constants are not packed and are stored in following dwords. */
load_const_full,
@@ -616,7 +631,8 @@ union packed_instr {
unsigned instr_type:4;
unsigned intrinsic:9;
unsigned num_components:3;
- unsigned _pad:8;
+ unsigned const_indices_encoding:2;
+ unsigned packed_const_indices:6;
unsigned dest:8;
} intrinsic;
struct {
@@ -874,6 +890,31 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
header.intrinsic.num_components =
encode_num_components_in_3bits(intrin->num_components);
+ /* Analyze constant indices to decide how to encode them. */
+ if (num_indices) {
+ unsigned max_bits = 0;
+ for (unsigned i = 0; i < num_indices; i++) {
+ unsigned max = util_last_bit(intrin->const_index[i]);
+ max_bits = MAX2(max_bits, max);
+ }
+
+ if (max_bits * num_indices <= 6) {
+ header.intrinsic.const_indices_encoding = const_indices_6bit_all_combined;
+
+ /* Pack all const indices into 6 bits. */
+ unsigned bit_size = 6 / num_indices;
+ for (unsigned i = 0; i < num_indices; i++) {
+ header.intrinsic.packed_const_indices |=
+ intrin->const_index[i] << (i * bit_size);
+ }
+ } else if (max_bits <= 8)
+ header.intrinsic.const_indices_encoding = const_indices_8bit;
+ else if (max_bits <= 16)
+ header.intrinsic.const_indices_encoding = const_indices_16bit;
+ else
+ header.intrinsic.const_indices_encoding = const_indices_32bit;
+ }
+
if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
write_dest(ctx, &intrin->dest, header);
else
@@ -882,8 +923,22 @@ write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
for (unsigned i = 0; i < num_srcs; i++)
write_src(ctx, &intrin->src[i]);
- for (unsigned i = 0; i < num_indices; i++)
- blob_write_uint32(ctx->blob, intrin->const_index[i]);
+ if (num_indices) {
+ switch (header.intrinsic.const_indices_encoding) {
+ case const_indices_8bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint8(ctx->blob, intrin->const_index[i]);
+ break;
+ case const_indices_16bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint16(ctx->blob, intrin->const_index[i]);
+ break;
+ case const_indices_32bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ blob_write_uint32(ctx->blob, intrin->const_index[i]);
+ break;
+ }
+ }
}
static nir_intrinsic_instr *
@@ -904,8 +959,32 @@ read_intrinsic(read_ctx *ctx, union packed_instr header)
for (unsigned i = 0; i < num_srcs; i++)
read_src(ctx, &intrin->src[i], &intrin->instr);
- for (unsigned i = 0; i < num_indices; i++)
- intrin->const_index[i] = blob_read_uint32(ctx->blob);
+ if (num_indices) {
+ switch (header.intrinsic.const_indices_encoding) {
+ case const_indices_6bit_all_combined: {
+ unsigned bit_size = 6 / num_indices;
+ unsigned bit_mask = u_bit_consecutive(0, bit_size);
+ for (unsigned i = 0; i < num_indices; i++) {
+ intrin->const_index[i] =
+ (header.intrinsic.packed_const_indices >> (i * bit_size)) &
+ bit_mask;
+ }
+ break;
+ }
+ case const_indices_8bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint8(ctx->blob);
+ break;
+ case const_indices_16bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint16(ctx->blob);
+ break;
+ case const_indices_32bit:
+ for (unsigned i = 0; i < num_indices; i++)
+ intrin->const_index[i] = blob_read_uint32(ctx->blob);
+ break;
+ }
+ }
return intrin;
}