summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-09-22 20:03:27 -0400
committerMarek Olšák <[email protected]>2018-10-17 12:27:58 -0400
commit669dd229835122143ff49b0c921e69a3a31ed46b (patch)
tree236d1a893151c00846fb23a79167f78c9820ea56
parent58a51d0a67a00711c63ab28fcec7e1bb775c2097 (diff)
util: document a limitation of util_fast_udiv32
trivial
-rw-r--r--src/util/fast_idiv_by_const.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h
index 92a3ccdf222..638b52a3ffb 100644
--- a/src/util/fast_idiv_by_const.h
+++ b/src/util/fast_idiv_by_const.h
@@ -135,7 +135,13 @@ static inline uint32_t
util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info)
{
n = n >> info.pre_shift;
- /* For non-power-of-two divisors, use a 32-bit ADD that clamps to UINT_MAX. */
+ /* If the divisor is not 1, you can instead use a 32-bit ADD that clamps
+ * to UINT_MAX. Dividing by 1 needs the full 64-bit ADD.
+ *
+ * If you have unsigned 64-bit MAD with 32-bit inputs, you can do:
+ * increment = increment ? multiplier : 0; // on the CPU
+ * (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD
+ */
n = (((uint64_t)n + info.increment) * info.multiplier) >> 32;
n = n >> info.post_shift;
return n;