diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/utils/charset.cpp | 60 | ||||
-rw-r--r-- | src/lib/utils/charset.h | 4 | ||||
-rw-r--r-- | src/tests/data/charset.vec | 28 | ||||
-rw-r--r-- | src/tests/test_utils.cpp | 75 |
4 files changed, 0 insertions, 167 deletions
diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp index 11278f985..8ffde469a 100644 --- a/src/lib/utils/charset.cpp +++ b/src/lib/utils/charset.cpp @@ -93,66 +93,6 @@ std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) } /* -* Convert from UTF-8 to ISO 8859-1 -*/ -std::string utf8_to_latin1(const std::string& utf8) - { - std::string iso8859; - - size_t position = 0; - while(position != utf8.size()) - { - const uint8_t c1 = static_cast<uint8_t>(utf8[position++]); - - if(c1 <= 0x7F) - { - iso8859 += static_cast<char>(c1); - } - else if(c1 >= 0xC0 && c1 <= 0xC7) - { - if(position == utf8.size()) - throw Decoding_Error("UTF-8: sequence truncated"); - - const uint8_t c2 = static_cast<uint8_t>(utf8[position++]); - const uint8_t iso_char = ((c1 & 0x07) << 6) | (c2 & 0x3F); - - if(iso_char <= 0x7F) - throw Decoding_Error("UTF-8: sequence longer than needed"); - - iso8859 += static_cast<char>(iso_char); - } - else - throw Decoding_Error("UTF-8: Unicode chars not in Latin1 used"); - } - - return iso8859; - } - -/* -* Convert from UCS-2 to ISO 8859-1 -*/ -std::string ucs2_to_latin1(const std::string& ucs2) - { - if(ucs2.size() % 2 == 1) - throw Decoding_Error("UCS-2 string has an odd number of bytes"); - - std::string latin1; - - for(size_t i = 0; i != ucs2.size(); i += 2) - { - const uint8_t c1 = ucs2[i]; - const uint8_t c2 = ucs2[i+1]; - - if(c1 != 0) - throw Decoding_Error("UCS-2 has non-Latin1 characters"); - - latin1 += static_cast<char>(c2); - } - - return latin1; - } - -/* * Convert from ISO 8859-1 to UTF-8 */ std::string latin1_to_utf8(const std::string& iso8859) diff --git a/src/lib/utils/charset.h b/src/lib/utils/charset.h index 9d05ea15d..b14b1ad41 100644 --- a/src/lib/utils/charset.h +++ b/src/lib/utils/charset.h @@ -33,10 +33,6 @@ BOTAN_TEST_API std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len); * Convert a UTF-8 string to Latin-1 * If a character outside the Latin-1 range is encountered, an exception is thrown. */ -BOTAN_TEST_API std::string utf8_to_latin1(const std::string& utf8); - -BOTAN_TEST_API std::string ucs2_to_latin1(const std::string& ucs2); - BOTAN_TEST_API std::string latin1_to_utf8(const std::string& iso8859); namespace Charset { diff --git a/src/tests/data/charset.vec b/src/tests/data/charset.vec index 6f12be8c2..ca5936e70 100644 --- a/src/tests/data/charset.vec +++ b/src/tests/data/charset.vec @@ -16,34 +16,6 @@ Out = E0A880000000000000E38080000000E1B49DE1B49DC480000000E1B49DE1B49D0000000000 In = 0000004800000065000000690000007A000000F60000006C00000072000000FC000000630000006B00000073000000740000006F000000DF000000610000006200000064000000E40000006D0000007000000066000000750000006E00000067 Out = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 -[UTF16-LATIN1] - -# Botan -In = 0042006F00740061006E -Out = 426F74616E - -# Heizölrückstoßabdämpfung -In = 004800650069007A00F6006C007200FC0063006B00730074006F00DF00610062006400E4006D007000660075006E0067 -Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 - -# ÿ@Ðé¿ã!ð -In = 00FF004000D000E900BF00E3002100F0 -Out = FF40D0E9BFE321F0 - -[UTF8-LATIN1] - -# Botan -In = 426F74616E -Out = 426F74616E - -# Heizölrückstoßabdämpfung -In = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 -Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 - -# ÿ@Ðé¿ã!ð -In = C3BF40C390C3A9C2BFC3A321C3B0 -Out = FF40D0E9BFE321F0 - [LATIN1-UTF8] # Botan diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp index d6004fb0b..e7c1089fa 100644 --- a/src/tests/test_utils.cpp +++ b/src/tests/test_utils.cpp @@ -758,14 +758,6 @@ class Charset_Tests final : public Text_Based_Test { converted = Botan::ucs4_to_utf8(in.data(), in.size()); } - else if(type == "UTF8-LATIN1") - { - converted = Botan::utf8_to_latin1(in_str); - } - else if(type == "UTF16-LATIN1") - { - converted = Botan::ucs2_to_latin1(in_str); - } else if(type == "LATIN1-UTF8") { converted = Botan::latin1_to_utf8(in_str); @@ -779,73 +771,6 @@ class Charset_Tests final : public Text_Based_Test return result; } - - Test::Result utf16_to_latin1_negative_tests() - { - Test::Result result("Charset negative tests"); - - result.test_throws("conversion fails for non-Latin1 characters", []() - { - // "abcdefŸabcdef" - std::vector<uint8_t> input = { 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66, 0x01, - 0x78, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66 - }; - - Botan::ucs2_to_latin1(std::string(input.begin(), input.end())); - }); - - result.test_throws("conversion fails for UTF16 string with odd number of bytes", []() - { - std::vector<uint8_t> input = { 0x00, 0x61, 0x00 }; - - Botan::ucs2_to_latin1(std::string(input.begin(), input.end())); - }); - - return result; - } - - Test::Result utf8_to_latin1_negative_tests() - { - Test::Result result("Charset negative tests"); - - result.test_throws("conversion fails for non-Latin1 characters", []() - { - // "abcdefŸabcdef" - const std::vector<uint8_t> input = - { - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, - 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 - }; - - Botan::utf8_to_latin1(std::string(input.begin(), input.end())); - }); - - result.test_throws("invalid utf-8 string", []() - { - // sequence truncated - const std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; - Botan::utf8_to_latin1(std::string(input.begin(), input.end())); - }); - - result.test_throws("invalid utf-8 string", []() - { - std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 }; - Botan::utf8_to_latin1(std::string(input.begin(), input.end())); - }); - - return result; - } - - std::vector<Test::Result> run_final_tests() override - { - Test::Result result("Charset negative tests"); - - result.merge(utf16_to_latin1_negative_tests()); - result.merge(utf8_to_latin1_negative_tests()); - - return{ result }; - } - }; BOTAN_REGISTER_TEST("utils", "charset", Charset_Tests); |