diff options
author | Alyssa Rosenzweig <[email protected]> | 2019-12-19 16:39:56 -0500 |
---|---|---|
committer | Alyssa Rosenzweig <[email protected]> | 2019-12-24 19:48:57 -0500 |
commit | 31305e1b2854c6a70dc8c61265f525fcb52c265d (patch) | |
tree | 864aba886cdc1fbb7b62963be622513733788517 /src/gallium | |
parent | 8a576726734bb214e5af3587b7544d93f81fe260 (diff) |
panfrost: Move instancing routines to encoder/
Nothing Gallium specific or stateful about them.
Signed-off-by: Alyssa Rosenzweig <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/panfrost/pan_attributes.c | 215 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_context.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_context.h | 14 |
3 files changed, 1 insertions, 238 deletions
diff --git a/src/gallium/drivers/panfrost/pan_attributes.c b/src/gallium/drivers/panfrost/pan_attributes.c index 4fa3546f3a3..c1b58b36610 100644 --- a/src/gallium/drivers/panfrost/pan_attributes.c +++ b/src/gallium/drivers/panfrost/pan_attributes.c @@ -26,221 +26,6 @@ #include "pan_bo.h" #include "pan_context.h" -/* See mali_job for notes on how this works. But basically, for small vertex - * counts, we have a lookup table, and for large vertex counts, we look at the - * high bits as a heuristic. This has to match exactly how the hardware - * calculates this (which is why the algorithm is so weird) or else instancing - * will break. */ - -/* Given an odd number (of the form 2k + 1), compute k */ -#define ODD(odd) ((odd - 1) >> 1) - -/* Given the shift/odd pair, recover the original padded integer */ - -unsigned -pan_expand_shift_odd(struct pan_shift_odd o) -{ - unsigned odd = 2*o.odd + 1; - unsigned shift = 1 << o.shift; - return odd * shift; -} - -static inline struct pan_shift_odd -pan_factored(unsigned pot, unsigned odd) -{ - struct pan_shift_odd out; - - assert(util_is_power_of_two_or_zero(pot)); - assert(odd & 1); - - /* Odd is of the form (2k + 1) = (k << 1) + 1 = (k << 1) | 1. - * - * So (odd >> 1) = ((k << 1) | 1) >> 1 = ((k << 1) >> 1) | (1 >> 1) - * = k | 0 = k */ - - out.odd = (odd >> 1); - - /* POT is the form (1 << shift) */ - out.shift = __builtin_ctz(pot); - - return out; -} - - -/* For small vertices. Second argument is whether the primitive takes a - * power-of-two argument, which determines how rounding works. True for POINTS - * and LINES, false for TRIANGLES. Presumably true for QUADS but you'd be crazy - * to try instanced quads on ES class hardware <3 */ - -static struct { - unsigned pot; - unsigned odd; -} small_lut[] = { - { 0, 1 }, - { 1, 1 }, - { 2, 1 }, - { 1, 3 }, - { 4, 1 }, - { 1, 5 }, - { 2, 3 }, - { 1, 7 }, - { 8, 1 }, - { 1, 9 }, - { 2, 5 }, - { 4, 3 }, /* 11 */ - { 4, 3 }, - { 2, 7 }, /* 13 */ - { 2, 7 }, - { 16, 1 }, /* 15 */ - { 16, 1 }, - { 2, 9 }, - { 4, 5 }, /* 20 */ - { 4, 5 } -}; - -static struct pan_shift_odd -panfrost_small_padded_vertex_count(unsigned idx) -{ - return pan_factored( - small_lut[idx].pot, - small_lut[idx].odd); -} - -static struct pan_shift_odd -panfrost_large_padded_vertex_count(uint32_t vertex_count) -{ - struct pan_shift_odd out = { 0 }; - - /* First, we have to find the highest set one */ - unsigned highest = 32 - __builtin_clz(vertex_count); - - /* Using that, we mask out the highest 4-bits */ - unsigned n = highest - 4; - unsigned nibble = (vertex_count >> n) & 0xF; - - /* Great, we have the nibble. Now we can just try possibilities. Note - * that we don't care about the bottom most bit in most cases, and we - * know the top bit must be 1 */ - - unsigned middle_two = (nibble >> 1) & 0x3; - - switch (middle_two) { - case 0b00: - if (nibble & 1) - return pan_factored(1 << n, 9); - else - return pan_factored(1 << (n + 1), 5); - case 0b01: - return pan_factored(1 << (n + 2), 3); - case 0b10: - return pan_factored(1 << (n + 1), 7); - case 0b11: - return pan_factored(1 << (n + 4), 1); - default: - unreachable("Invalid two bits"); - } - - return out; -} - -struct pan_shift_odd -panfrost_padded_vertex_count( - unsigned vertex_count, - bool pot) -{ - assert(vertex_count > 0); - - if (vertex_count < 20) { - /* Add an off-by-one if it won't align naturally (quirk of the hardware) */ - //if (!pot) - // vertex_count++; - - return panfrost_small_padded_vertex_count(vertex_count); - } else - return panfrost_large_padded_vertex_count(vertex_count); -} - -/* The much, much more irritating case -- instancing is enabled. See - * panfrost_job.h for notes on how this works */ - -static unsigned -panfrost_vertex_instanced( - unsigned padded_count, - unsigned instance_shift, unsigned instance_odd, - unsigned divisor, - union mali_attr *attrs) -{ - /* Depending if there is an instance divisor or not, packing varies. - * When there is a divisor, the hardware-level divisor is actually the - * product of the instance divisor and the padded count */ - - unsigned hw_divisor = padded_count * divisor; - - if (divisor == 0) { - /* Per-vertex attributes use the MODULO mode. First, compute - * the modulus */ - - attrs->elements |= MALI_ATTR_MODULO; - attrs->shift = instance_shift; - attrs->extra_flags = instance_odd; - - return 1; - } else if (util_is_power_of_two_or_zero(hw_divisor)) { - /* If there is a divisor but the hardware divisor works out to - * a power of two (not terribly exceptional), we can use an - * easy path (just shifting) */ - - attrs->elements |= MALI_ATTR_POT_DIVIDE; - attrs->shift = __builtin_ctz(hw_divisor); - - return 1; - } else { - /* We have a NPOT divisor. Here's the fun one (multipling by - * the inverse and shifting) */ - - /* floor(log2(d)) */ - unsigned shift = util_logbase2(hw_divisor); - - /* m = ceil(2^(32 + shift) / d) */ - uint64_t shift_hi = 32 + shift; - uint64_t t = 1ll << shift_hi; - double t_f = t; - double hw_divisor_d = hw_divisor; - double m_f = ceil(t_f / hw_divisor_d); - unsigned m = m_f; - - /* Default case */ - uint32_t magic_divisor = m, extra_flags = 0; - - /* e = 2^(shift + 32) % d */ - uint64_t e = t % hw_divisor; - - /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob - * seems to use a different condition */ - if (e <= (1ll << shift)) { - magic_divisor = m - 1; - extra_flags = 1; - } - - /* Top flag implicitly set */ - assert(magic_divisor & (1u << 31)); - magic_divisor &= ~(1u << 31); - - /* Upload to two different slots */ - - attrs[0].elements |= MALI_ATTR_NPOT_DIVIDE; - attrs[0].shift = shift; - attrs[0].extra_flags = extra_flags; - - attrs[1].unk = 0x20; - attrs[1].magic_divisor = magic_divisor; - attrs[1].zero = 0; - attrs[1].divisor = divisor; - - return 2; - } -} - void panfrost_emit_vertex_data(struct panfrost_batch *batch) { diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index ea87bbbdbd5..ea2bb442047 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1551,16 +1551,8 @@ panfrost_draw_vbo( /* Encode the padded vertex count */ if (info->instance_count > 1) { - /* Triangles have non-even vertex counts so they change how - * padding works internally */ - - bool is_triangle = - mode == PIPE_PRIM_TRIANGLES || - mode == PIPE_PRIM_TRIANGLE_STRIP || - mode == PIPE_PRIM_TRIANGLE_FAN; - struct pan_shift_odd so = - panfrost_padded_vertex_count(vertex_count, !is_triangle); + panfrost_padded_vertex_count(vertex_count); ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = so.shift; ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = so.shift; diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 260c26a9fdb..ad0791bf06a 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -336,20 +336,6 @@ panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i); void panfrost_emit_vertex_data(struct panfrost_batch *batch); -struct pan_shift_odd { - unsigned shift; - unsigned odd; -}; - -struct pan_shift_odd -panfrost_padded_vertex_count( - unsigned vertex_count, - bool primitive_pot); - - -unsigned -pan_expand_shift_odd(struct pan_shift_odd o); - /* Compute */ void |