aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRene Meusel <[email protected]>2017-10-28 18:40:57 +0200
committerJack Lloyd <[email protected]>2017-11-09 16:05:17 -0500
commitcb4977cf9396485d8a133aea1802e4bd57988e55 (patch)
treefe03065e87cab6e2e31c29810f04844b1a8c56d9 /src
parent2349f20fdb312a74b2705bf7f0e298be13e3d7a0 (diff)
add conversion from UCS-2/4 to UTF-8
Diffstat (limited to 'src')
-rw-r--r--src/lib/asn1/asn1_str.cpp53
-rw-r--r--src/tests/test_asn1.cpp7
2 files changed, 47 insertions, 13 deletions
diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp
index 4e1d1d78d..e9cc8ccdc 100644
--- a/src/lib/asn1/asn1_str.cpp
+++ b/src/lib/asn1/asn1_str.cpp
@@ -10,6 +10,9 @@
#include <botan/ber_dec.h>
#include <botan/charset.h>
+#include <codecvt>
+#include <locale>
+
namespace Botan {
namespace {
@@ -58,6 +61,28 @@ ASN1_Tag choose_encoding(const std::string& str,
}
+template <typename CharT, class AllocT>
+static std::string ucsX_to_utf8(const std::vector<byte, AllocT> &ucsX)
+ {
+ if (ucsX.size() % sizeof(CharT) != 0)
+ {
+ throw Invalid_Argument("cannot decode UCS string (wrong byte count)");
+ }
+
+ union
+ {
+ const byte *as_char;
+ const CharT *as_wide_char;
+ };
+
+ as_char = ucsX.data();
+ const size_t wide_char_count = ucsX.size() / sizeof(CharT);
+
+ using converter_t = std::codecvt_utf8<CharT, 0x10ffff, std::consume_header>;
+ std::wstring_convert<converter_t, CharT> convert;
+ return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count);
+ }
+
/*
* Create an ASN1_String
*/
@@ -124,22 +149,28 @@ void ASN1_String::decode_from(BER_Decoder& source)
{
BER_Object obj = source.get_next_object();
- Character_Set charset_is;
-
- if(obj.type_tag == BMP_STRING) // Basic Multilingual Plane - 2 byte encoding
- charset_is = UCS2_CHARSET;
- else if(obj.type_tag == UTF8_STRING)
- charset_is = UTF8_CHARSET;
+ if(obj.type_tag == UTF8_STRING)
+ {
+ *this = ASN1_String(ASN1::to_string(obj), obj.type_tag);
+ }
+ else if(obj.type_tag == BMP_STRING)
+ {
+ *this = ASN1_String(ucsX_to_utf8<char16_t>(obj.value), obj.type_tag);
+ }
+ else if(obj.type_tag == UNIVERSAL_STRING)
+ {
+ *this = ASN1_String(ucsX_to_utf8<char32_t>(obj.value), obj.type_tag);
+ }
else // IA5_STRING - international ASCII characters
// T61_STRING - pretty much ASCII
// PRINTABLE_STRING - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE)
// VISIBLE_STRING - visible ASCII subset
// NUMERIC_STRING - ASCII subset (0-9 and SPACE)
- charset_is = LATIN1_CHARSET;
-
- *this = ASN1_String(
- Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, charset_is),
- obj.type_tag);
+ {
+ *this = ASN1_String(
+ Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET),
+ obj.type_tag);
+ }
}
}
diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp
index eae1d96f8..f28093e4b 100644
--- a/src/tests/test_asn1.cpp
+++ b/src/tests/test_asn1.cpp
@@ -58,13 +58,14 @@ Test::Result test_asn1_utf8_ascii_parsing()
// ... - UTF-8 encoded (ASCII chars only) word 'Moscow'
const std::string moscow =
"\x13\x06\x4D\x6F\x73\x63\x6F\x77";
+ const std::string moscow_plain = "Moscow";
Botan::DataSource_Memory input(moscow.data());
Botan::BER_Decoder dec(input);
Botan::ASN1_String str;
str.decode_from(dec);
- result.test_success("No crash");
+ result.test_eq("value()", str.value(), moscow_plain);
}
catch(const Botan::Decoding_Error &ex)
{
@@ -85,13 +86,15 @@ Test::Result test_asn1_utf8_parsing()
// ... - UTF-8 encoded russian word for Moscow in cyrillic script
const std::string moscow =
"\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+ const std::string moscow_plain =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
Botan::DataSource_Memory input(moscow.data());
Botan::BER_Decoder dec(input);
Botan::ASN1_String str;
str.decode_from(dec);
- result.test_success("No crash");
+ result.test_eq("value()", str.value(), moscow_plain);
}
catch(const Botan::Decoding_Error &ex)
{