summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/panfrost/pandecode/decode.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/panfrost/pandecode/decode.c')
-rw-r--r--src/gallium/drivers/panfrost/pandecode/decode.c112
1 files changed, 108 insertions, 4 deletions
diff --git a/src/gallium/drivers/panfrost/pandecode/decode.c b/src/gallium/drivers/panfrost/pandecode/decode.c
index 4cc7ca03995..189c2482316 100644
--- a/src/gallium/drivers/panfrost/pandecode/decode.c
+++ b/src/gallium/drivers/panfrost/pandecode/decode.c
@@ -859,6 +859,100 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
return MALI_NEGATIVE(fb->rt_count_1);
}
+/* Just add a comment decoding the shift/odd fields forming the padded vertices
+ * count */
+
+static void
+pandecode_padded_vertices(unsigned shift, unsigned k)
+{
+ unsigned odd = 2*k + 1;
+ unsigned pot = 1 << shift;
+ pandecode_msg("padded_num_vertices = %d\n", odd * pot);
+}
+
+/* Given a magic divisor, recover what we were trying to divide by.
+ *
+ * Let m represent the magic divisor. By definition, m is an element on Z, whre
+ * 0 <= m < 2^N, for N bits in m.
+ *
+ * Let q represent the number we would like to divide by.
+ *
+ * By definition of a magic divisor for N-bit unsigned integers (a number you
+ * multiply by to magically get division), m is a number such that:
+ *
+ * (m * x) & (2^N - 1) = floor(x/q).
+ * for all x on Z where 0 <= x < 2^N
+ *
+ * Ignore the case where any of the above values equals zero; it is irrelevant
+ * for our purposes (instanced arrays).
+ *
+ * Choose x = q. Then:
+ *
+ * (m * x) & (2^N - 1) = floor(x/q).
+ * (m * q) & (2^N - 1) = floor(q/q).
+ *
+ * floor(q/q) = floor(1) = 1, therefore:
+ *
+ * (m * q) & (2^N - 1) = 1
+ *
+ * Recall the identity that the bitwise AND of one less than a power-of-two
+ * equals the modulo with that power of two, i.e. for all x:
+ *
+ * x & (2^N - 1) = x % N
+ *
+ * Therefore:
+ *
+ * mq % (2^N) = 1
+ *
+ * By definition, a modular multiplicative inverse of a number m is the number
+ * q such that with respect to a modulos M:
+ *
+ * mq % M = 1
+ *
+ * Therefore, q is the modular multiplicative inverse of m with modulus 2^N.
+ *
+ */
+
+static void
+pandecode_magic_divisor(uint32_t magic, unsigned shift, unsigned orig_divisor, unsigned extra)
+{
+ /* Compute the modular inverse of `magic` with respect to 2^(32 -
+ * shift) the most lame way possible... just repeatedly add.
+ * Asymptoptically slow but nobody cares in practice, unless you have
+ * massive numbers of vertices or high divisors. */
+
+ unsigned inverse = 0;
+
+ /* Magic implicitly has the highest bit set */
+ magic |= (1 << 31);
+
+ /* Depending on rounding direction */
+ if (extra)
+ magic++;
+
+ for (;;) {
+ uint32_t product = magic * inverse;
+
+ if (shift) {
+ product >>= shift;
+ }
+
+ if (product == 1)
+ break;
+
+ ++inverse;
+ }
+
+ pandecode_msg("dividing by %d (maybe off by two)\n", inverse);
+
+ /* Recall we're supposed to divide by (gl_level_divisor *
+ * padded_num_vertices) */
+
+ unsigned padded_num_vertices = inverse / orig_divisor;
+
+ pandecode_msg("padded_num_vertices = %d\n", padded_num_vertices);
+}
+
static void
pandecode_replay_attributes(const struct pandecode_mapped_memory *mem,
mali_ptr addr, int job_no, char *suffix,
@@ -905,9 +999,9 @@ pandecode_replay_attributes(const struct pandecode_mapped_memory *mem,
/* Decode further where possible */
if (mode == MALI_ATTR_MODULO) {
- unsigned odd = (2 * attr[i].extra_flags) + 1;
- unsigned pot = (1 << attr[i].shift);
- pandecode_msg("padded_num_vertices = %d\n", odd * pot);
+ pandecode_padded_vertices(
+ attr[i].shift,
+ attr[i].extra_flags);
}
pandecode_indent--;
@@ -922,6 +1016,7 @@ pandecode_replay_attributes(const struct pandecode_mapped_memory *mem,
if (attr[i].zero != 0)
pandecode_prop("zero = 0x%x /* XXX zero tripped */", attr[i].zero);
pandecode_prop("divisor = %d", attr[i].divisor);
+ pandecode_magic_divisor(attr[i].magic_divisor, attr[i - 1].shift, attr[i].divisor, attr[i - 1].extra_flags);
pandecode_indent--;
pandecode_log("}, \n");
}
@@ -1114,7 +1209,7 @@ pandecode_replay_attribute_meta(int job_no, int count, const struct mali_vertex_
pandecode_prop("unknown1 = 0x%" PRIx64, (u64) attr_meta->unknown1);
pandecode_prop("unknown3 = 0x%" PRIx64, (u64) attr_meta->unknown3);
- pandecode_prop("src_offset = 0x%" PRIx64, (u64) attr_meta->src_offset);
+ pandecode_prop("src_offset = %d", attr_meta->src_offset);
pandecode_indent--;
pandecode_log("},\n");
@@ -2040,6 +2135,15 @@ pandecode_replay_vertex_or_tiler_job_mdg(const struct mali_job_descriptor_header
pandecode_replay_gl_enables(v->gl_enables, h->job_type);
+ if (v->instance_shift || v->instance_odd) {
+ pandecode_prop("instance_shift = 0x%d /* %d */",
+ v->instance_shift, 1 << v->instance_shift);
+ pandecode_prop("instance_odd = 0x%X /* %d */",
+ v->instance_odd, (2 * v->instance_odd) + 1);
+
+ pandecode_padded_vertices(v->instance_shift, v->instance_odd);
+ }
+
if (v->draw_start)
pandecode_prop("draw_start = %d", v->draw_start);