diff options
author | Daniel Neus <[email protected]> | 2016-07-22 15:49:44 +0200 |
---|---|---|
committer | Daniel Neus <[email protected]> | 2016-07-25 14:17:58 +0200 |
commit | 936f0130de73e0e81478118111423674943b3179 (patch) | |
tree | 1ff711407785d49a6ca6fa16bbbcfe2f2b281200 | |
parent | cbe70db412126518b705545d666159a496a9e63d (diff) |
add some basic charset tests
- UCS-2 to ISO 8859-1
- UTF-8 to ISO 8859-1
- ISO 8859-1 to UTF-8
-rw-r--r-- | src/tests/data/charset.vec | 41 | ||||
-rw-r--r-- | src/tests/test_utils.cpp | 124 |
2 files changed, 165 insertions, 0 deletions
diff --git a/src/tests/data/charset.vec b/src/tests/data/charset.vec new file mode 100644 index 000000000..dd64ac6e3 --- /dev/null +++ b/src/tests/data/charset.vec @@ -0,0 +1,41 @@ +[UTF16-LATIN1] + +# Botan +In = 0042006F00740061006E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 004800650069007A00F6006C007200FC0063006B00730074006F00DF00610062006400E4006D007000660075006E0067 +Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = 00FF004000D000E900BF00E3002100F0 +Out = FF40D0E9BFE321F0 + +[UTF8-LATIN1] + +# Botan +In = 426F74616E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 +Out = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = C3BF40C390C3A9C2BFC3A321C3B0 +Out = FF40D0E9BFE321F0 + +[LATIN1-UTF8] + +# Botan +In = 426F74616E +Out = 426F74616E + +# Heizölrückstoßabdämpfung +In = 4865697AF66C72FC636B73746FDF616264E46D7066756E67 +Out = 4865697AC3B66C72C3BC636B73746FC39F616264C3A46D7066756E67 + +# ÿ@Ðé¿ã!ð +In = FF40D0E9BFE321F0 +Out = C3BF40C390C3A9C2BFC3A321C3B0
\ No newline at end of file diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp index 47f740a17..f5edfc0e0 100644 --- a/src/tests/test_utils.cpp +++ b/src/tests/test_utils.cpp @@ -9,6 +9,7 @@ #include <botan/loadstor.h> #include <botan/calendar.h> #include <botan/internal/rounding.h> +#include <botan/charset.h> #if defined(BOTAN_HAS_BASE64_CODEC) #include <botan/base64.h> @@ -326,6 +327,129 @@ BOTAN_REGISTER_TEST("base64", Base64_Tests); #endif +class Charset_Tests : public Text_Based_Test + { + public: + Charset_Tests() : Text_Based_Test("charset.vec", + { "In","Out" }) + {} + + Test::Result run_one_test(const std::string& type, const VarMap& vars) override + { + using namespace Botan; + + Test::Result result("Charset"); + + const std::vector<byte> in = get_req_bin(vars, "In"); + const std::vector<byte> expected = get_req_bin(vars, "Out"); + + std::string converted; + if(type == "UTF16-LATIN1") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + } + else if(type == "UTF8-LATIN1") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + } + else if(type == "LATIN1-UTF8") + { + converted = Charset::transcode(std::string(in.begin(), in.end()), + Character_Set::UTF8_CHARSET, Character_Set::LATIN1_CHARSET); + } + else + { + throw Test_Error("Unexpected header '" + type + "' in charset tests"); + } + + result.test_eq("string converted successfully", std::vector<byte>(converted.begin(), converted.end()), expected); + + return result; + } + + Test::Result utf16_to_latin1_negative_tests() + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.test_throws("conversion fails for non-Latin1 characters", []() + { + // "abcdef�abcdef" + std::vector<byte> input = { 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66, 0x01, + 0x78, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x64, 0x00, 0x65, 0x00, 0x66 + }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + }); + + result.test_throws("conversion fails for UTF16 string with odd number of bytes", []() + { + std::vector<byte> input = { 0x00, 0x61, 0x00 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UCS2_CHARSET); + }); + + return result; + } + + Test::Result utf8_to_latin1_negative_tests() + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.test_throws("conversion fails for non-Latin1 characters", []() + { + // "abcdef�abcdef" + std::vector<byte> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, + 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 + }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + result.test_throws("invalid utf-8 string", []() + { + // sequence truncated + std::vector<byte> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + result.test_throws("invalid utf-8 string", []() + { + std::vector<byte> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 }; + + Charset::transcode(std::string(input.begin(), input.end()), + Character_Set::LATIN1_CHARSET, Character_Set::UTF8_CHARSET); + }); + + return result; + } + + std::vector<Test::Result> run_final_tests() override + { + using namespace Botan; + + Test::Result result("Charset negative tests"); + + result.merge(utf16_to_latin1_negative_tests()); + result.merge(utf8_to_latin1_negative_tests()); + + return{ result }; + } + + }; + +BOTAN_REGISTER_TEST("charset", Charset_Tests); + } } |