From 936f0130de73e0e81478118111423674943b3179 Mon Sep 17 00:00:00 2001 From: Daniel Neus Date: Fri, 22 Jul 2016 15:49:44 +0200 Subject: add some basic charset tests - UCS-2 to ISO 8859-1 - UTF-8 to ISO 8859-1 - ISO 8859-1 to UTF-8 --- src/tests/data/charset.vec | 41 +++++++++++++++ src/tests/test_utils.cpp | 124 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 src/tests/data/charset.vec (limited to 'src/tests') diff --git a/src/tests/data/charset.vec b/src/tests/data/charset.vec new file mode 100644 index 000000000..dd64ac6e3 --- /dev/null +++ b/src/tests/data/charset.vec @@ -0,0 +1,41 @@ +[UTF16-LATIN1] + +# Botan +In = 0042006F00740061006E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 004800650069007A00F6006C007200FC0063006B00730074006F00DF00610062006400E4006D007000660075006E0067 +Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = 00FF004000D000E900BF00E3002100F0 +Out = FF40D0E9BFE321F0 + +[UTF8-LATIN1] + +# Botan +In = 426F74616E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 +Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = C3BF40C390C3A9C2BFC3A321C3B0 +Out = FF40D0E9BFE321F0 + +[LATIN1-UTF8] + +# Botan +In = 426F74616E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 +Out = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = FF40D0E9BFE321F0 +Out = C3BF40C390C3A9C2BFC3A321C3B0 \ No newline at end of file diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp index 47f740a17..f5edfc0e0 100644 --- a/src/tests/test_utils.cpp +++ b/src/tests/test_utils.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #if defined(BOTAN_HAS_BASE64_CODEC) #include @@ -326,6 +327,129 @@ BOTAN_REGISTER_TEST("base64", Base64_Tests); #endif +class Charset_Tests : public Text_Based_Test + { + public: + Charset_Tests() : Text_Based_Test("charset.vec", + { "In","Out" }) + {} + + Test::Result run_one_test(const std::string& type, const VarMap& vars) override + { + using namespace Botan; + + Test::Result result("Charset"); + + const std::vector in = get_req_bin(vars, "In"); + const std::vector expected = get_req_bin(vars, "Out"); + + std::string converted; + if(type == "UTF16-LATIN1") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + } + else if(type == "UTF8-LATIN1") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + } + else if(type == "LATIN1-UTF8") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::UTF8_CHARSET, Character_Set::LATIN1_CHARSET); + } + else + { + throw Test_Error("Unexpected header '" + type + "' in charset tests"); + } + + result.test_eq("string converted successfully", std::vector(converted.begin(), converted.end()), expected); + + return result; + } + + Test::Result utf16_to_latin1_negative_tests() + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.test_throws("conversion fails for non-Latin1 characters", []() + { + // "abcdefabcdef" + std::vector input = { 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66, 0x01, + 0x78, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66 + }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + }); + + result.test_throws("conversion fails for UTF16 string with odd number of bytes", []() + { + std::vector input = { 0x00, 0x61, 0x00 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + }); + + return result; + } + + Test::Result utf8_to_latin1_negative_tests() + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.test_throws("conversion fails for non-Latin1 characters", []() + { + // "abcdefabcdef" + std::vector input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, + 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 + }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + result.test_throws("invalid utf-8 string", []() + { + // sequence truncated + std::vector input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + result.test_throws("invalid utf-8 string", []() + { + std::vector input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + return result; + } + + std::vector run_final_tests() override + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.merge(utf16_to_latin1_negative_tests()); + result.merge(utf8_to_latin1_negative_tests()); + + return{ result }; + } + + }; + +BOTAN_REGISTER_TEST("charset", Charset_Tests); + } } -- cgit v1.2.3