add conversion from UCS-2/4 to UTF-8

author: Rene Meusel <[email protected]> 2017-10-28 18:40:57 +0200
committer: Jack Lloyd <[email protected]> 2017-11-09 16:05:17 -0500
commit: cb4977cf9396485d8a133aea1802e4bd57988e55 (patch)
tree: fe03065e87cab6e2e31c29810f04844b1a8c56d9 /src
parent: 2349f20fdb312a74b2705bf7f0e298be13e3d7a0 (diff)
2 files changed, 47 insertions, 13 deletions
diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp
index 4e1d1d78d..e9cc8ccdc 100644
--- a/src/lib/asn1/asn1_str.cpp
+++ b/src/lib/asn1/asn1_str.cpp
@@ -10,6 +10,9 @@
 #include <botan/ber_dec.h>
 #include <botan/charset.h>
 
+#include <codecvt>
+#include <locale>
+
 namespace Botan {
 
 namespace {
@@ -58,6 +61,28 @@ ASN1_Tag choose_encoding(const std::string& str,
 
 }
 
+template <typename CharT, class AllocT>
+static std::string ucsX_to_utf8(const std::vector<byte, AllocT> &ucsX)
+   {
+   if (ucsX.size() % sizeof(CharT) != 0)
+      {
+      throw Invalid_Argument("cannot decode UCS string (wrong byte count)");
+      }
+
+   union
+      {
+      const byte  *as_char;
+      const CharT *as_wide_char;
+      };
+
+   as_char = ucsX.data();
+   const size_t wide_char_count = ucsX.size() / sizeof(CharT);
+
+   using converter_t = std::codecvt_utf8<CharT, 0x10ffff, std::consume_header>;
+   std::wstring_convert<converter_t, CharT> convert;
+   return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count);
+   }
+
 /*
 * Create an ASN1_String
 */
@@ -124,22 +149,28 @@ void ASN1_String::decode_from(BER_Decoder& source)
    {
    BER_Object obj = source.get_next_object();
 
-   Character_Set charset_is;
-
-   if(obj.type_tag == BMP_STRING) // Basic Multilingual Plane - 2 byte encoding
-      charset_is = UCS2_CHARSET;
-   else if(obj.type_tag == UTF8_STRING)
-      charset_is = UTF8_CHARSET;
+   if(obj.type_tag == UTF8_STRING)
+      {
+      *this = ASN1_String(ASN1::to_string(obj), obj.type_tag);
+      }
+   else if(obj.type_tag == BMP_STRING)
+      {
+      *this = ASN1_String(ucsX_to_utf8<char16_t>(obj.value), obj.type_tag);
+      }
+   else if(obj.type_tag == UNIVERSAL_STRING)
+      {
+      *this = ASN1_String(ucsX_to_utf8<char32_t>(obj.value), obj.type_tag);
+      }
    else // IA5_STRING        - international ASCII characters
         // T61_STRING        - pretty much ASCII
         // PRINTABLE_STRING  - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE)
         // VISIBLE_STRING    - visible ASCII subset
         // NUMERIC_STRING    - ASCII subset (0-9 and SPACE)
-      charset_is = LATIN1_CHARSET;
-
-   *this = ASN1_String(
-      Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, charset_is),
-      obj.type_tag);
+      {
+      *this = ASN1_String(
+         Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET),
+         obj.type_tag);
+      }
    }
 
 }
diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp
index eae1d96f8..f28093e4b 100644
--- a/src/tests/test_asn1.cpp
+++ b/src/tests/test_asn1.cpp
@@ -58,13 +58,14 @@ Test::Result test_asn1_utf8_ascii_parsing()
             // ...  - UTF-8 encoded (ASCII chars only) word 'Moscow'
             const std::string moscow =
                "\x13\x06\x4D\x6F\x73\x63\x6F\x77";
+            const std::string moscow_plain = "Moscow";
             Botan::DataSource_Memory input(moscow.data());
             Botan::BER_Decoder dec(input);
 
             Botan::ASN1_String str;
             str.decode_from(dec);
 
-            result.test_success("No crash");
+            result.test_eq("value()", str.value(), moscow_plain);
          }
       catch(const Botan::Decoding_Error &ex)
          {
@@ -85,13 +86,15 @@ Test::Result test_asn1_utf8_parsing()
             // ...  - UTF-8 encoded russian word for Moscow in cyrillic script
             const std::string moscow =
                "\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+            const std::string moscow_plain =
+               "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
             Botan::DataSource_Memory input(moscow.data());
             Botan::BER_Decoder dec(input);
 
             Botan::ASN1_String str;
             str.decode_from(dec);
 
-            result.test_success("No crash");
+            result.test_eq("value()", str.value(), moscow_plain);
          }
       catch(const Botan::Decoding_Error &ex)
          {
author	Rene Meusel <[email protected]>	2017-10-28 18:40:57 +0200
committer	Jack Lloyd <[email protected]>	2017-11-09 16:05:17 -0500
commit	cb4977cf9396485d8a133aea1802e4bd57988e55 (patch)
tree	fe03065e87cab6e2e31c29810f04844b1a8c56d9 /src
parent	2349f20fdb312a74b2705bf7f0e298be13e3d7a0 (diff)