From 43a5fb0dc76adf80dc6e36e7f9ba0341176c7f3b Mon Sep 17 00:00:00 2001 From: Sven Gothel Date: Tue, 6 Sep 2022 10:27:32 +0200 Subject: Add jau::codec::base::ascii38_alphabet: Drop lower case letters suitable for unique vfat filename and use it for temp-dir Base 38 is still big enough to provide > INT_MAX @ 6 digits. --- include/jau/base_codec.hpp | 39 +++++++++++++++++++++++++++ java_base/org/jau/util/BaseCodec.java | 42 ++++++++++++++++++++++++++++++ src/file_util.cpp | 4 +-- test/java/jau/test/util/TestBaseCodec.java | 10 +++++-- test/test_codec_base01.cpp | 9 +++++-- 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/include/jau/base_codec.hpp b/include/jau/base_codec.hpp index 62e74c9..d5d9e3c 100644 --- a/include/jau/base_codec.hpp +++ b/include/jau/base_codec.hpp @@ -255,6 +255,45 @@ namespace jau::codec::base { : alphabet("natural86", 86, data, '=', s_code_point) {} }; + /** + * Safe base 38 alphabet with ASCII code-point sorting order. + * + * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_` + * - Padding: `=` + * + * ### Properties + * - 7-bit ASCII + * - Code page 437 compatible + * - Safe URL and filename use + * - Excludes forbidden [v]fat chars: `<>:"/\|?*` + * - Only using upper-case letters for unique filename under vfat + * - Excludes quoting chars: "'$ and space + * - Supporting ASCII code-point sorting. + * - Order: `-` < `0` < `A` < `a` < `z` + */ + class ascii38_alphabet : public alphabet { + private: + static inline constexpr const std::string_view data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"; + + static int s_code_point(const char c) noexcept { + if ('0' <= c && c <= '9') { + return c - '0' + 1; + } else if ('A' <= c && c <= 'Z') { + return c - 'A' + 11; + } else if ('-' == c) { + return 0; + } else if ('_' == c) { + return 37; + } else { + return -1; + } + } + + public: + ascii38_alphabet() noexcept + : alphabet("ascii38", 38, data, '=', s_code_point) {} + }; + /** * Safe base 64 alphabet with ASCII code-point sorting order. * diff --git a/java_base/org/jau/util/BaseCodec.java b/java_base/org/jau/util/BaseCodec.java index 500eec7..b7c8f47 100644 --- a/java_base/org/jau/util/BaseCodec.java +++ b/java_base/org/jau/util/BaseCodec.java @@ -245,6 +245,48 @@ public class BaseCodec { } } + /** + * Safe base 38 alphabet with ASCII code-point sorting order. + * + * - Value: `-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_` + * - Padding: `=` + * + * ### Properties + * - 7-bit ASCII + * - Code page 437 compatible + * - Safe URL and filename use + * - Excludes forbidden [v]fat chars: `<>:"/\|?*` + * - Only using upper-case letters for unique filename under vfat + * - Excludes quoting chars: "'$ and space + * - Supporting ASCII code-point sorting. + * - Order: `-` < `0` < `A` < `a` < `z` + * + * @see encodeBase() + * @see decodeBase() + */ + public static class Ascii38Alphabet extends Alphabet { + private static final String data = "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"; + + @Override + public int code_point(final char c) { + if ('0' <= c && c <= '9') { + return c - '0' + 1; + } else if ('A' <= c && c <= 'Z') { + return c - 'A' + 11; + } else if ('-' == c) { + return 0; + } else if ('_' == c) { + return 37; + } else { + return -1; + } + } + + public Ascii38Alphabet() { + super("ascii38", 38, data, '='); + } + } + /** * Safe base 64 alphabet with ASCII code-point sorting order. * diff --git a/src/file_util.cpp b/src/file_util.cpp index 3e8cb23..9fdaf68 100644 --- a/src/file_util.cpp +++ b/src/file_util.cpp @@ -1531,7 +1531,7 @@ static bool copy_push_mkdir(const file_stats& dst_stats, copy_context_t& ctx) no } else if( !dst_stats.exists() ) { new_dir = true; constexpr const int32_t val_min = 888; - constexpr const int32_t val_max = std::numeric_limits::max(); // 6 digits base 64 > INT_MAX + constexpr const int32_t val_max = std::numeric_limits::max(); // 6 digits base 38 > INT_MAX uint64_t mkdir_cntr = 0; std::mt19937_64 prng; std::uniform_int_distribution prng_dist(val_min, val_max); @@ -1539,7 +1539,7 @@ static bool copy_push_mkdir(const file_stats& dst_stats, copy_context_t& ctx) no do { ++mkdir_cntr; const int32_t val_d = prng_dist(prng); - basename_ = "."+jau::codec::base::encode(val_d, 64, jau::codec::base::ascii64_alphabet(), 6); // base 64, 6 digits + basename_ = "."+jau::codec::base::encode(val_d, 38, jau::codec::base::ascii38_alphabet(), 6); // base 38, 6 digits if( 0 == ::mkdirat(dest_dirfd, basename_.c_str(), jau::fs::posix_protection_bits(fmode_t::rwx_usr)) ) { mkdir_ok = true; } else if (errno != EINTR && errno != EEXIST) { diff --git a/test/java/jau/test/util/TestBaseCodec.java b/test/java/jau/test/util/TestBaseCodec.java index ca164f3..d5ea37e 100644 --- a/test/java/jau/test/util/TestBaseCodec.java +++ b/test/java/jau/test/util/TestBaseCodec.java @@ -187,7 +187,13 @@ public class TestBaseCodec { } @Test - public void test01IntegerBase64() { + public void test01IntegerBase38() { + testRadix_3digits_int32(38, new BaseCodec.Ascii38Alphabet()); + testRadix_3digits_int32(38, new BaseCodec.Ascii64Alphabet()); + } + + @Test + public void test02IntegerBase64() { testIntegerBase64(new BaseCodec.Base64Alphabet()); testIntegerBase64(new BaseCodec.Base64urlAlphabet()); testIntegerBase64(new BaseCodec.Natural86Alphabet()); @@ -196,7 +202,7 @@ public class TestBaseCodec { } @Test - public void test02IntegerBase86() { + public void test03IntegerBase86() { testIntegerBase86(new BaseCodec.Natural86Alphabet()); testIntegerBase86(new BaseCodec.Ascii86Alphabet()); } diff --git a/test/test_codec_base01.cpp b/test/test_codec_base01.cpp index 86063e9..e5f9f2b 100644 --- a/test/test_codec_base01.cpp +++ b/test/test_codec_base01.cpp @@ -183,7 +183,12 @@ static void testIntegerBase86(const jau::codec::base::alphabet& aspec) { // testRadix_int64(86, aspec, 0x0_i64, 0x7FFFFFFFFFFFFFFF_i64); } -TEST_CASE( "Integer Base 64 Encoding Test 01", "[integer][type]" ) { +TEST_CASE( "Integer Base 38 Encoding Test 01", "[integer][type]" ) { + testRadix_3digits_int32(38, jau::codec::base::ascii38_alphabet()); + testRadix_3digits_int32(38, jau::codec::base::ascii64_alphabet()); +} + +TEST_CASE( "Integer Base 64 Encoding Test 02", "[integer][type]" ) { testIntegerBase64(jau::codec::base::base64_alphabet()); testIntegerBase64(jau::codec::base::base64url_alphabet()); testIntegerBase64(jau::codec::base::natural86_alphabet()); @@ -191,7 +196,7 @@ TEST_CASE( "Integer Base 64 Encoding Test 01", "[integer][type]" ) { testIntegerBase64(jau::codec::base::ascii86_alphabet()); } -TEST_CASE( "Integer Base 86 Encoding Test 02", "[integer][type]" ) { +TEST_CASE( "Integer Base 86 Encoding Test 03", "[integer][type]" ) { testIntegerBase86(jau::codec::base::natural86_alphabet()); testIntegerBase86(jau::codec::base::ascii86_alphabet()); } -- cgit v1.2.3