diff options
author | Rene Meusel <[email protected]> | 2017-10-28 18:40:57 +0200 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-11-09 16:05:17 -0500 |
commit | cb4977cf9396485d8a133aea1802e4bd57988e55 (patch) | |
tree | fe03065e87cab6e2e31c29810f04844b1a8c56d9 /src | |
parent | 2349f20fdb312a74b2705bf7f0e298be13e3d7a0 (diff) |
add conversion from UCS-2/4 to UTF-8
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/asn1/asn1_str.cpp | 53 | ||||
-rw-r--r-- | src/tests/test_asn1.cpp | 7 |
2 files changed, 47 insertions, 13 deletions
diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp index 4e1d1d78d..e9cc8ccdc 100644 --- a/src/lib/asn1/asn1_str.cpp +++ b/src/lib/asn1/asn1_str.cpp @@ -10,6 +10,9 @@ #include <botan/ber_dec.h> #include <botan/charset.h> +#include <codecvt> +#include <locale> + namespace Botan { namespace { @@ -58,6 +61,28 @@ ASN1_Tag choose_encoding(const std::string& str, } +template <typename CharT, class AllocT> +static std::string ucsX_to_utf8(const std::vector<byte, AllocT> &ucsX) + { + if (ucsX.size() % sizeof(CharT) != 0) + { + throw Invalid_Argument("cannot decode UCS string (wrong byte count)"); + } + + union + { + const byte *as_char; + const CharT *as_wide_char; + }; + + as_char = ucsX.data(); + const size_t wide_char_count = ucsX.size() / sizeof(CharT); + + using converter_t = std::codecvt_utf8<CharT, 0x10ffff, std::consume_header>; + std::wstring_convert<converter_t, CharT> convert; + return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count); + } + /* * Create an ASN1_String */ @@ -124,22 +149,28 @@ void ASN1_String::decode_from(BER_Decoder& source) { BER_Object obj = source.get_next_object(); - Character_Set charset_is; - - if(obj.type_tag == BMP_STRING) // Basic Multilingual Plane - 2 byte encoding - charset_is = UCS2_CHARSET; - else if(obj.type_tag == UTF8_STRING) - charset_is = UTF8_CHARSET; + if(obj.type_tag == UTF8_STRING) + { + *this = ASN1_String(ASN1::to_string(obj), obj.type_tag); + } + else if(obj.type_tag == BMP_STRING) + { + *this = ASN1_String(ucsX_to_utf8<char16_t>(obj.value), obj.type_tag); + } + else if(obj.type_tag == UNIVERSAL_STRING) + { + *this = ASN1_String(ucsX_to_utf8<char32_t>(obj.value), obj.type_tag); + } else // IA5_STRING - international ASCII characters // T61_STRING - pretty much ASCII // PRINTABLE_STRING - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE) // VISIBLE_STRING - visible ASCII subset // NUMERIC_STRING - ASCII subset (0-9 and SPACE) - charset_is = LATIN1_CHARSET; - - *this = ASN1_String( - Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, charset_is), - obj.type_tag); + { + *this = ASN1_String( + Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET), + obj.type_tag); + } } } diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp index eae1d96f8..f28093e4b 100644 --- a/src/tests/test_asn1.cpp +++ b/src/tests/test_asn1.cpp @@ -58,13 +58,14 @@ Test::Result test_asn1_utf8_ascii_parsing() // ... - UTF-8 encoded (ASCII chars only) word 'Moscow' const std::string moscow = "\x13\x06\x4D\x6F\x73\x63\x6F\x77"; + const std::string moscow_plain = "Moscow"; Botan::DataSource_Memory input(moscow.data()); Botan::BER_Decoder dec(input); Botan::ASN1_String str; str.decode_from(dec); - result.test_success("No crash"); + result.test_eq("value()", str.value(), moscow_plain); } catch(const Botan::Decoding_Error &ex) { @@ -85,13 +86,15 @@ Test::Result test_asn1_utf8_parsing() // ... - UTF-8 encoded russian word for Moscow in cyrillic script const std::string moscow = "\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + const std::string moscow_plain = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; Botan::DataSource_Memory input(moscow.data()); Botan::BER_Decoder dec(input); Botan::ASN1_String str; str.decode_from(dec); - result.test_success("No crash"); + result.test_eq("value()", str.value(), moscow_plain); } catch(const Botan::Decoding_Error &ex) { |