summaryrefslogtreecommitdiffstats
path: root/src/panfrost/encoder/pan_scratch.c
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2020-02-25 15:40:20 -0500
committerTomeu Vizoso <[email protected]>2020-02-27 16:32:17 +0100
commit2fea44c6361b171c9313a75a7e9ef4cbf97602f0 (patch)
tree41c3017a9b5f143148872889bb66ee69c5655d27 /src/panfrost/encoder/pan_scratch.c
parent40fd1f9da448b65bbd6491199e9f66cef7fdbe32 (diff)
panfrost: Simplify stack shift calculation
I'm not sure why I never saw smaller values, but here you go. Signed-off-by: Alyssa Rosenzweig <[email protected]> Reviewed-by: Tomeu Vizoso <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3950>
Diffstat (limited to 'src/panfrost/encoder/pan_scratch.c')
-rw-r--r--src/panfrost/encoder/pan_scratch.c21
1 files changed, 14 insertions, 7 deletions
diff --git a/src/panfrost/encoder/pan_scratch.c b/src/panfrost/encoder/pan_scratch.c
index 04a9bb7d1f6..478a788b116 100644
--- a/src/panfrost/encoder/pan_scratch.c
+++ b/src/panfrost/encoder/pan_scratch.c
@@ -47,13 +47,14 @@
* https://en.wikipedia.org/wiki/Mali_%28GPU% [citation needed]
*
* Within a particular thread, there is stack allocated. If it is present, its
- * size is a power-of-two, and it is at least 256 bytes. Stack is allocated
+ * size is a power-of-two, and it is at least 16 bytes. Stack is allocated
* with the shared memory descriptor used for all shaders within a frame (note
* that they don't execute concurrently so it's fine). So, consider the maximum
* stack size used by any shader within a job, and then compute (where npot
* denotes the next power of two):
*
- * allocated = npot(max(size, 256)) * (# of threads/core) * (# of cores)
+ * bytes/thread = npot(max(size, 16))
+ * allocated = (# of bytes/thread) * (# of threads/core) * (# of cores)
*
* The size of Thread Local Storage is signaled to the GPU in a dedicated
* log_stack_size field. Since stack sizes are powers of two, it follows that
@@ -70,20 +71,26 @@
* stack_size <= 2^(log_stack_size + 4)
*
* Given the constraints about powers-of-two and the minimum of 256, we thus
- * derive a formula for log_stack_size in terms of stack size (s):
+ * derive a formula for log_stack_size in terms of stack size (s), where s is
+ * positive:
*
- * log_stack_size = ceil(log2(max(s, 256))) - 4
+ * log_stack_size = ceil(log2(max(s, 16))) - 4
*
* There are other valid characterisations of this formula, of course, but this
- * is computationally simple, so good enough for our purposes.
+ * is computationally simple, so good enough for our purposes. If s=0, since
+ * there is no spilling used whatsoever, we may set log_stack_size to 0 to
+ * disable the stack.
*/
-/* Computes log_stack_size = ceil(log2(max(s, 256))) - 4 */
+/* Computes log_stack_size = ceil(log2(max(s, 16))) - 4 */
unsigned
panfrost_get_stack_shift(unsigned stack_size)
{
- return util_logbase2_ceil(MAX2(stack_size, 256)) - 4;
+ if (stack_size)
+ return util_logbase2_ceil(MAX2(stack_size, 16)) - 4;
+ else
+ return 0;
}
/* Computes the aligned stack size given the shift and thread count. The blob