From cb4977cf9396485d8a133aea1802e4bd57988e55 Mon Sep 17 00:00:00 2001 From: Rene Meusel Date: Sat, 28 Oct 2017 18:40:57 +0200 Subject: add conversion from UCS-2/4 to UTF-8 --- src/lib/asn1/asn1_str.cpp | 53 +++++++++++++++++++++++++++++++++++++---------- src/tests/test_asn1.cpp | 7 +++++-- 2 files changed, 47 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp index 4e1d1d78d..e9cc8ccdc 100644 --- a/src/lib/asn1/asn1_str.cpp +++ b/src/lib/asn1/asn1_str.cpp @@ -10,6 +10,9 @@ #include #include +#include +#include + namespace Botan { namespace { @@ -58,6 +61,28 @@ ASN1_Tag choose_encoding(const std::string& str, } +template +static std::string ucsX_to_utf8(const std::vector &ucsX) + { + if (ucsX.size() % sizeof(CharT) != 0) + { + throw Invalid_Argument("cannot decode UCS string (wrong byte count)"); + } + + union + { + const byte *as_char; + const CharT *as_wide_char; + }; + + as_char = ucsX.data(); + const size_t wide_char_count = ucsX.size() / sizeof(CharT); + + using converter_t = std::codecvt_utf8; + std::wstring_convert convert; + return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count); + } + /* * Create an ASN1_String */ @@ -124,22 +149,28 @@ void ASN1_String::decode_from(BER_Decoder& source) { BER_Object obj = source.get_next_object(); - Character_Set charset_is; - - if(obj.type_tag == BMP_STRING) // Basic Multilingual Plane - 2 byte encoding - charset_is = UCS2_CHARSET; - else if(obj.type_tag == UTF8_STRING) - charset_is = UTF8_CHARSET; + if(obj.type_tag == UTF8_STRING) + { + *this = ASN1_String(ASN1::to_string(obj), obj.type_tag); + } + else if(obj.type_tag == BMP_STRING) + { + *this = ASN1_String(ucsX_to_utf8(obj.value), obj.type_tag); + } + else if(obj.type_tag == UNIVERSAL_STRING) + { + *this = ASN1_String(ucsX_to_utf8(obj.value), obj.type_tag); + } else // IA5_STRING - international ASCII characters // T61_STRING - pretty much ASCII // PRINTABLE_STRING - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE) // VISIBLE_STRING - visible ASCII subset // NUMERIC_STRING - ASCII subset (0-9 and SPACE) - charset_is = LATIN1_CHARSET; - - *this = ASN1_String( - Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, charset_is), - obj.type_tag); + { + *this = ASN1_String( + Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET), + obj.type_tag); + } } } diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp index eae1d96f8..f28093e4b 100644 --- a/src/tests/test_asn1.cpp +++ b/src/tests/test_asn1.cpp @@ -58,13 +58,14 @@ Test::Result test_asn1_utf8_ascii_parsing() // ... - UTF-8 encoded (ASCII chars only) word 'Moscow' const std::string moscow = "\x13\x06\x4D\x6F\x73\x63\x6F\x77"; + const std::string moscow_plain = "Moscow"; Botan::DataSource_Memory input(moscow.data()); Botan::BER_Decoder dec(input); Botan::ASN1_String str; str.decode_from(dec); - result.test_success("No crash"); + result.test_eq("value()", str.value(), moscow_plain); } catch(const Botan::Decoding_Error &ex) { @@ -85,13 +86,15 @@ Test::Result test_asn1_utf8_parsing() // ... - UTF-8 encoded russian word for Moscow in cyrillic script const std::string moscow = "\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + const std::string moscow_plain = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; Botan::DataSource_Memory input(moscow.data()); Botan::BER_Decoder dec(input); Botan::ASN1_String str; str.decode_from(dec); - result.test_success("No crash"); + result.test_eq("value()", str.value(), moscow_plain); } catch(const Botan::Decoding_Error &ex) { -- cgit v1.2.3