aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-11-10 08:23:45 -0500
committerJack Lloyd <[email protected]>2017-11-10 08:23:45 -0500
commitbff1490ac8001fe8dcfdc145c44c6cce28a0cba1 (patch)
treee70dda7d11abd6683a9376c35930d23e1b68c2aa
parent2abac405f483b45ddb653f710be396d8d37936b2 (diff)
parentba16efe696d181b0165e57f86cade99934aa788f (diff)
Merge GH #1287 #1289 Use UTF-8 encoding for ASN.1 strings
-rw-r--r--doc/manual/x509.rst13
-rw-r--r--src/cli/asn1.cpp15
-rw-r--r--src/lib/asn1/asn1_alt_name.cpp13
-rw-r--r--src/lib/asn1/asn1_obj.h1
-rw-r--r--src/lib/asn1/asn1_str.cpp103
-rw-r--r--src/lib/asn1/asn1_str.h18
-rw-r--r--src/lib/asn1/asn1_time.cpp11
-rw-r--r--src/lib/asn1/info.txt2
-rw-r--r--src/lib/utils/charset.cpp58
-rw-r--r--src/lib/utils/charset.h44
-rw-r--r--src/lib/x509/name_constraint.cpp3
-rw-r--r--src/lib/x509/x509_ext.cpp7
-rw-r--r--src/tests/data/x509test/contains_bmpstring.pem22
-rw-r--r--src/tests/data/x509test/contains_utf8string.pem24
-rw-r--r--src/tests/test_asn1.cpp186
-rw-r--r--src/tests/test_utils.cpp35
-rw-r--r--src/tests/unit_x509.cpp65
17 files changed, 447 insertions, 173 deletions
diff --git a/doc/manual/x509.rst b/doc/manual/x509.rst
index 72a7b32b7..1fb6d90f5 100644
--- a/doc/manual/x509.rst
+++ b/doc/manual/x509.rst
@@ -79,19 +79,6 @@ associated with a position of some sort in the organization. It may
also include fields for state/province and locality. What a locality
is, nobody knows, but it's usually given as a city name.
-Botan doesn't currently support any of the Unicode variants used in
-ASN.1 (UTF-8, UCS-2, and UCS-4), any of which could be used for the
-fields in the DN. This could be problematic, particularly in Asia and
-other areas where non-ASCII characters are needed for most names. The
-UTF-8 and UCS-2 string types *are* accepted (in fact, UTF-8 is used
-when encoding much of the time), but if any of the characters included
-in the string are not in ISO 8859-1 (ie 0 ... 255), an exception will
-get thrown. Currently the ``ASN1_String`` type holds its data as ISO
-8859-1 internally (regardless of local character set); this would have
-to be changed to hold UCS-2 or UCS-4 in order to support Unicode
-(also, many interfaces in the X.509 code would have to accept or
-return a ``std::wstring`` instead of a ``std::string``).
-
Like the distinguished names, subject alternative names can contain a lot of
things that Botan will flat out ignore (most of which you would likely never
want to use). However, there are three very useful pieces of information that
diff --git a/src/cli/asn1.cpp b/src/cli/asn1.cpp
index 5c90a3c5b..234cbd6e6 100644
--- a/src/cli/asn1.cpp
+++ b/src/cli/asn1.cpp
@@ -22,9 +22,6 @@
#include <sstream>
#include <ctype.h>
-// Set this if your terminal understands UTF-8; otherwise output is in Latin-1
-#define UTF8_TERMINAL 1
-
namespace Botan_CLI {
namespace {
@@ -349,17 +346,7 @@ void decode(std::ostream& output,
{
Botan::ASN1_String str;
data.decode(str);
- if(UTF8_TERMINAL)
- {
- emit(output, type_name(type_tag), level, length,
- Botan::Charset::transcode(str.iso_8859(),
- Botan::UTF8_CHARSET,
- Botan::LATIN1_CHARSET));
- }
- else
- {
- emit(output, type_name(type_tag), level, length, str.iso_8859());
- }
+ emit(output, type_name(type_tag), level, length, str.value());
}
else if(type_tag == Botan::UTC_TIME || type_tag == Botan::GENERALIZED_TIME)
{
diff --git a/src/lib/asn1/asn1_alt_name.cpp b/src/lib/asn1/asn1_alt_name.cpp
index 7bd4cd494..940312886 100644
--- a/src/lib/asn1/asn1_alt_name.cpp
+++ b/src/lib/asn1/asn1_alt_name.cpp
@@ -11,7 +11,6 @@
#include <botan/ber_dec.h>
#include <botan/oids.h>
#include <botan/internal/stl_util.h>
-#include <botan/charset.h>
#include <botan/parsing.h>
#include <botan/loadstor.h>
@@ -133,7 +132,7 @@ void encode_entries(DER_Encoder& encoder,
if(type == "RFC822" || type == "DNS" || type == "URI")
{
ASN1_String asn1_string(i->second, IA5_STRING);
- encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.iso_8859());
+ encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.value());
}
else if(type == "IP")
{
@@ -218,13 +217,9 @@ void AlternativeName::decode_from(BER_Decoder& source)
}
else if(tag == 1 || tag == 2 || tag == 6)
{
- const std::string value = Charset::transcode(ASN1::to_string(obj),
- LATIN1_CHARSET,
- LOCAL_CHARSET);
-
- if(tag == 1) add_attribute("RFC822", value);
- if(tag == 2) add_attribute("DNS", value);
- if(tag == 6) add_attribute("URI", value);
+ if(tag == 1) add_attribute("RFC822", ASN1::to_string(obj));
+ if(tag == 2) add_attribute("DNS", ASN1::to_string(obj));
+ if(tag == 6) add_attribute("URI", ASN1::to_string(obj));
}
else if(tag == 7)
{
diff --git a/src/lib/asn1/asn1_obj.h b/src/lib/asn1/asn1_obj.h
index 63c7dc2e3..95b84c5c1 100644
--- a/src/lib/asn1/asn1_obj.h
+++ b/src/lib/asn1/asn1_obj.h
@@ -45,6 +45,7 @@ enum ASN1_Tag {
T61_STRING = 0x14,
IA5_STRING = 0x16,
VISIBLE_STRING = 0x1A,
+ UNIVERSAL_STRING = 0x1C,
BMP_STRING = 0x1E,
UTC_TIME = 0x17,
diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp
index 526e10158..d90aa215b 100644
--- a/src/lib/asn1/asn1_str.cpp
+++ b/src/lib/asn1/asn1_str.cpp
@@ -17,8 +17,7 @@ namespace {
/*
* Choose an encoding for the string
*/
-ASN1_Tag choose_encoding(const std::string& str,
- const std::string& type)
+ASN1_Tag choose_encoding(const std::string& str)
{
static const uint8_t IS_PRINTABLE[256] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -48,40 +47,49 @@ ASN1_Tag choose_encoding(const std::string& str,
{
if(!IS_PRINTABLE[static_cast<uint8_t>(str[i])])
{
- if(type == "utf8") return UTF8_STRING;
- if(type == "latin1") return T61_STRING;
- throw Invalid_Argument("choose_encoding: Bad string type " + type);
+ return UTF8_STRING;
}
}
return PRINTABLE_STRING;
}
+void assert_is_string_type(ASN1_Tag tag)
+ {
+ if(tag != NUMERIC_STRING &&
+ tag != PRINTABLE_STRING &&
+ tag != VISIBLE_STRING &&
+ tag != T61_STRING &&
+ tag != IA5_STRING &&
+ tag != UTF8_STRING &&
+ tag != BMP_STRING &&
+ tag != UNIVERSAL_STRING)
+ {
+ throw Invalid_Argument("ASN1_String: Unknown string type " +
+ std::to_string(tag));
+ }
+ }
+
}
/*
* Create an ASN1_String
*/
-ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_iso_8859_str(Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET)), m_tag(t)
+ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_utf8_str(str), m_tag(t)
{
-
if(m_tag == DIRECTORY_STRING)
- m_tag = choose_encoding(m_iso_8859_str, "latin1");
-
- if(m_tag != NUMERIC_STRING &&
- m_tag != PRINTABLE_STRING &&
- m_tag != VISIBLE_STRING &&
- m_tag != T61_STRING &&
- m_tag != IA5_STRING &&
- m_tag != UTF8_STRING &&
- m_tag != BMP_STRING)
- throw Invalid_Argument("ASN1_String: Unknown string type " +
- std::to_string(m_tag));
+ {
+ m_tag = choose_encoding(m_utf8_str);
+ }
+
+ assert_is_string_type(m_tag);
}
/*
* Create an ASN1_String
*/
-ASN1_String::ASN1_String(const std::string& str) : m_iso_8859_str(Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET)), m_tag(choose_encoding(m_iso_8859_str, "latin1"))
+ASN1_String::ASN1_String(const std::string& str) :
+ m_utf8_str(str),
+ m_tag(choose_encoding(m_utf8_str))
{}
/*
@@ -89,23 +97,7 @@ ASN1_String::ASN1_String(const std::string& str) : m_iso_8859_str(Charset::trans
*/
std::string ASN1_String::iso_8859() const
{
- return m_iso_8859_str;
- }
-
-/*
-* Return this string in local encoding
-*/
-std::string ASN1_String::value() const
- {
- return Charset::transcode(m_iso_8859_str, LATIN1_CHARSET, LOCAL_CHARSET);
- }
-
-/*
-* Return the type of this string object
-*/
-ASN1_Tag ASN1_String::tagging() const
- {
- return m_tag;
+ return utf8_to_latin1(m_utf8_str);
}
/*
@@ -113,10 +105,15 @@ ASN1_Tag ASN1_String::tagging() const
*/
void ASN1_String::encode_into(DER_Encoder& encoder) const
{
- std::string value = iso_8859();
- if(tagging() == UTF8_STRING)
- value = Charset::transcode(value, LATIN1_CHARSET, UTF8_CHARSET);
- encoder.add_object(tagging(), UNIVERSAL, value);
+ if(m_data.empty())
+ {
+ encoder.add_object(tagging(), UNIVERSAL, m_utf8_str);
+ }
+ else
+ {
+ // If this string was decoded, reserialize using original encoding
+ encoder.add_object(tagging(), UNIVERSAL, m_data.data(), m_data.size());
+ }
}
/*
@@ -126,18 +123,24 @@ void ASN1_String::decode_from(BER_Decoder& source)
{
BER_Object obj = source.get_next_object();
- Character_Set charset_is;
+ assert_is_string_type(obj.type_tag);
- if(obj.type_tag == BMP_STRING)
- charset_is = UCS2_CHARSET;
- else if(obj.type_tag == UTF8_STRING)
- charset_is = UTF8_CHARSET;
- else
- charset_is = LATIN1_CHARSET;
+ m_tag = obj.type_tag;
+ m_data.assign(obj.value.begin(), obj.value.end());
- *this = ASN1_String(
- Charset::transcode(ASN1::to_string(obj), LOCAL_CHARSET, charset_is),
- obj.type_tag);
+ if(m_tag == BMP_STRING)
+ {
+ m_utf8_str = ucs2_to_utf8(m_data.data(), m_data.size());
+ }
+ else if(m_tag == UNIVERSAL_STRING)
+ {
+ m_utf8_str = ucs4_to_utf8(m_data.data(), m_data.size());
+ }
+ else
+ {
+ // All other supported string types are UTF-8 or some subset thereof
+ m_utf8_str = ASN1::to_string(obj);
+ }
}
}
diff --git a/src/lib/asn1/asn1_str.h b/src/lib/asn1/asn1_str.h
index 9e0237550..f19265494 100644
--- a/src/lib/asn1/asn1_str.h
+++ b/src/lib/asn1/asn1_str.h
@@ -13,7 +13,8 @@
namespace Botan {
/**
-* Simple String
+* ASN.1 string type
+* This class normalizes all inputs to a UTF-8 std::string
*/
class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object
{
@@ -21,15 +22,18 @@ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object
void encode_into(class DER_Encoder&) const override;
void decode_from(class BER_Decoder&) override;
- std::string value() const;
- std::string iso_8859() const;
+ ASN1_Tag tagging() const { return m_tag; }
+
+ const std::string& value() const { return m_utf8_str; }
- ASN1_Tag tagging() const;
+ std::string BOTAN_DEPRECATED("Use value() to get UTF-8 string instead")
+ iso_8859() const;
- explicit ASN1_String(const std::string& = "");
- ASN1_String(const std::string&, ASN1_Tag);
+ explicit ASN1_String(const std::string& utf8 = "");
+ ASN1_String(const std::string& utf8, ASN1_Tag tag);
private:
- std::string m_iso_8859_str;
+ std::vector<uint8_t> m_data;
+ std::string m_utf8_str;
ASN1_Tag m_tag;
};
diff --git a/src/lib/asn1/asn1_time.cpp b/src/lib/asn1/asn1_time.cpp
index 2cd225915..f6a0c414e 100644
--- a/src/lib/asn1/asn1_time.cpp
+++ b/src/lib/asn1/asn1_time.cpp
@@ -8,7 +8,6 @@
#include <botan/asn1_time.h>
#include <botan/der_enc.h>
#include <botan/ber_dec.h>
-#include <botan/charset.h>
#include <botan/exceptn.h>
#include <botan/parsing.h>
#include <botan/calendar.h>
@@ -41,20 +40,14 @@ void X509_Time::encode_into(DER_Encoder& der) const
if(m_tag != GENERALIZED_TIME && m_tag != UTC_TIME)
throw Invalid_Argument("X509_Time: Bad encoding tag");
- der.add_object(m_tag, UNIVERSAL,
- Charset::transcode(to_string(),
- LOCAL_CHARSET,
- LATIN1_CHARSET));
+ der.add_object(m_tag, UNIVERSAL, to_string());
}
void X509_Time::decode_from(BER_Decoder& source)
{
BER_Object ber_time = source.get_next_object();
- set_to(Charset::transcode(ASN1::to_string(ber_time),
- LATIN1_CHARSET,
- LOCAL_CHARSET),
- ber_time.type_tag);
+ set_to(ASN1::to_string(ber_time), ber_time.type_tag);
}
std::string X509_Time::to_string() const
diff --git a/src/lib/asn1/info.txt b/src/lib/asn1/info.txt
index 4b3689f45..4772e1ca7 100644
--- a/src/lib/asn1/info.txt
+++ b/src/lib/asn1/info.txt
@@ -1,5 +1,5 @@
<defines>
-ASN1 -> 20161102
+ASN1 -> 20171109
</defines>
<requires>
diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp
index dadee8f78..ca32c652d 100644
--- a/src/lib/utils/charset.cpp
+++ b/src/lib/utils/charset.cpp
@@ -92,34 +92,6 @@ std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
return s;
}
-namespace Charset {
-
-namespace {
-
-/*
-* Convert from UCS-2 to ISO 8859-1
-*/
-std::string ucs2_to_latin1(const std::string& ucs2)
- {
- if(ucs2.size() % 2 == 1)
- throw Decoding_Error("UCS-2 string has an odd number of bytes");
-
- std::string latin1;
-
- for(size_t i = 0; i != ucs2.size(); i += 2)
- {
- const uint8_t c1 = ucs2[i];
- const uint8_t c2 = ucs2[i+1];
-
- if(c1 != 0)
- throw Decoding_Error("UCS-2 has non-Latin1 characters");
-
- latin1 += static_cast<char>(c2);
- }
-
- return latin1;
- }
-
/*
* Convert from UTF-8 to ISO 8859-1
*/
@@ -133,7 +105,9 @@ std::string utf8_to_latin1(const std::string& utf8)
const uint8_t c1 = static_cast<uint8_t>(utf8[position++]);
if(c1 <= 0x7F)
+ {
iso8859 += static_cast<char>(c1);
+ }
else if(c1 >= 0xC0 && c1 <= 0xC7)
{
if(position == utf8.size())
@@ -154,6 +128,34 @@ std::string utf8_to_latin1(const std::string& utf8)
return iso8859;
}
+namespace Charset {
+
+namespace {
+
+/*
+* Convert from UCS-2 to ISO 8859-1
+*/
+std::string ucs2_to_latin1(const std::string& ucs2)
+ {
+ if(ucs2.size() % 2 == 1)
+ throw Decoding_Error("UCS-2 string has an odd number of bytes");
+
+ std::string latin1;
+
+ for(size_t i = 0; i != ucs2.size(); i += 2)
+ {
+ const uint8_t c1 = ucs2[i];
+ const uint8_t c2 = ucs2[i+1];
+
+ if(c1 != 0)
+ throw Decoding_Error("UCS-2 has non-Latin1 characters");
+
+ latin1 += static_cast<char>(c2);
+ }
+
+ return latin1;
+ }
+
/*
* Convert from ISO 8859-1 to UTF-8
*/
diff --git a/src/lib/utils/charset.h b/src/lib/utils/charset.h
index 3f2ff9912..4913f0a5a 100644
--- a/src/lib/utils/charset.h
+++ b/src/lib/utils/charset.h
@@ -14,16 +14,6 @@
namespace Botan {
/**
-* The different charsets (nominally) supported by Botan.
-*/
-enum Character_Set {
- LOCAL_CHARSET,
- UCS2_CHARSET,
- UTF8_CHARSET,
- LATIN1_CHARSET
-};
-
-/**
* Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string
* This is used for ASN.1 BMPString type
* @param ucs2 the sequence of UCS-2 characters
@@ -39,15 +29,41 @@ std::string BOTAN_UNSTABLE_API ucs2_to_utf8(const uint8_t ucs2[], size_t len);
*/
std::string BOTAN_UNSTABLE_API ucs4_to_utf8(const uint8_t ucs4[], size_t len);
+/**
+* Convert a UTF-8 string to Latin-1
+* If a character outside the Latin-1 range is encountered, an exception is thrown.
+*/
+std::string BOTAN_UNSTABLE_API utf8_to_latin1(const std::string& utf8);
+
+/**
+* The different charsets (nominally) supported by Botan.
+*/
+enum Character_Set {
+ LOCAL_CHARSET,
+ UCS2_CHARSET,
+ UTF8_CHARSET,
+ LATIN1_CHARSET
+};
+
namespace Charset {
/*
-* Character Set Handling
+* Character set conversion - avoid this.
+* For specific conversions, use the functions above like
+* ucs2_to_utf8 and utf8_to_latin1
+*
+* If you need something more complex than that, use a real library
+* such as iconv, Boost.Locale, or ICU
*/
-std::string BOTAN_PUBLIC_API(2,0) transcode(const std::string& str,
- Character_Set to,
- Character_Set from);
+std::string BOTAN_PUBLIC_API(2,0)
+ BOTAN_DEPRECATED("Avoid. See comment in header.")
+ transcode(const std::string& str,
+ Character_Set to,
+ Character_Set from);
+/*
+* Simple character classifier functions
+*/
bool BOTAN_PUBLIC_API(2,0) is_digit(char c);
bool BOTAN_PUBLIC_API(2,0) is_space(char c);
bool BOTAN_PUBLIC_API(2,0) caseless_cmp(char x, char y);
diff --git a/src/lib/x509/name_constraint.cpp b/src/lib/x509/name_constraint.cpp
index e27dca9ec..e098bcd8d 100644
--- a/src/lib/x509/name_constraint.cpp
+++ b/src/lib/x509/name_constraint.cpp
@@ -7,7 +7,6 @@
#include <botan/name_constraint.h>
#include <botan/ber_dec.h>
-#include <botan/charset.h>
#include <botan/loadstor.h>
#include <botan/x509_dn.h>
#include <botan/x509cert.h>
@@ -49,7 +48,7 @@ void GeneralName::decode_from(class BER_Decoder& ber)
if(tag == 1 || tag == 2 || tag == 6)
{
- m_name = Charset::transcode(ASN1::to_string(obj), LATIN1_CHARSET, LOCAL_CHARSET);
+ m_name = ASN1::to_string(obj);
if(tag == 1)
{
diff --git a/src/lib/x509/x509_ext.cpp b/src/lib/x509/x509_ext.cpp
index 3141d3c44..6e4c29d42 100644
--- a/src/lib/x509/x509_ext.cpp
+++ b/src/lib/x509/x509_ext.cpp
@@ -12,7 +12,6 @@
#include <botan/der_enc.h>
#include <botan/ber_dec.h>
#include <botan/oids.h>
-#include <botan/charset.h>
#include <botan/internal/bit_ops.h>
#include <algorithm>
#include <sstream>
@@ -735,7 +734,7 @@ std::vector<uint8_t> Authority_Information_Access::encode_inner() const
.start_cons(SEQUENCE)
.start_cons(SEQUENCE)
.encode(OIDS::lookup("PKIX.OCSP"))
- .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.iso_8859())
+ .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.value())
.end_cons()
.end_cons().get_contents_unlocked();
}
@@ -758,9 +757,7 @@ void Authority_Information_Access::decode_inner(const std::vector<uint8_t>& in)
if(name.type_tag == 6 && name.class_tag == CONTEXT_SPECIFIC)
{
- m_ocsp_responder = Charset::transcode(ASN1::to_string(name),
- LATIN1_CHARSET,
- LOCAL_CHARSET);
+ m_ocsp_responder = ASN1::to_string(name);
}
}
diff --git a/src/tests/data/x509test/contains_bmpstring.pem b/src/tests/data/x509test/contains_bmpstring.pem
new file mode 100644
index 000000000..c204c4296
--- /dev/null
+++ b/src/tests/data/x509test/contains_bmpstring.pem
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDmDCCAoACCQCJ7TVHW6qlLDANBgkqhkiG9w0BAQUFADCBjTELMAkGA1UEBhMC
+REUxDzANBgNVBAcTBkJlcmxpbjEXMBUGA1UECh4OAG4AZQPHA/UAbgBpA8kxCzAJ
+BgNVBAsTAkRDMR8wHQYDVQQDHhYA6ABuAd0ELwAgBBwB3QQ5BDcETQQ7MSYwJAYJ
+KoZIhvcNAQkBFhdyZW5lLm1ldXNlbEBuZXhlbmlvLmNvbTAeFw0xNzExMTAwODQ5
+MzFaFw0xODExMTAwODQ5MzFaMIGNMQswCQYDVQQGEwJERTEPMA0GA1UEBxMGQmVy
+bGluMRcwFQYDVQQKHg4AbgBlA8cD9QBuAGkDyTELMAkGA1UECxMCREMxHzAdBgNV
+BAMeFgDoAG4B3QQvACAEHAHdBDkENwRNBDsxJjAkBgkqhkiG9w0BCQEWF3JlbmUu
+bWV1c2VsQG5leGVuaW8uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAvykcZZN3B+pWCi4eNnVv4jITDendIJhfjELocgALmgCJer5XH0gdChJMqHre
+bnhpBHEdgivMvVGN2BDCkz5+4WshqwZ8lSXIMlHXaaIy7CulhJUnj2lTsa75jr2F
+BmxUF+NwQMrBOOIs2IFlfqeEnlXgRD3pBv9erZI5ng3ciTgXMCbg21t1E56706cD
+sLKv2OWXZrz+KLIaNvNV5pNW1wLup3sCKhtFRaH721crp2KROvAGkb0R6oNannKm
+DQieQ1g4AR94ihCc1SjaoMydzrFhmyArgsusnkbrl6n14kHfSuyUul355ejSOz+V
+k0HGQMykh2WDo2MPy06oxw5ZAwIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQCoE+4W
+OLQ3FuTigBYf7Hv0D+qY58XIV0D9YgJYrCHJL7S31NTbCuoYK2NdfcwE1MUUxb28
+nCIR3jgawN1WlHCeiLkhrD6TnMNjvZ34xVqSmkQ5zIJTo909wFhjGjtByP5fAQqt
+uja2bJOUOg0GRsbrv1c8zCYnz5+bhRNrZqdxWQzB9c10QiMrReYxFY4wPwIVK9y5
+6eYA4dkkhiRV+KAtyPQokn9N5kUM4VjlyYS3IB3DzXlB3Z6mlHP7t/G5Su1Nmkxu
+NhqtL0Vg+oo6DDuzsI+1WZO9saPJnUPSNwu3BDIu56gWHlHw6dZksVK0J7eF5n8d
+rPULI83gQXKxuD+C
+-----END CERTIFICATE-----
diff --git a/src/tests/data/x509test/contains_utf8string.pem b/src/tests/data/x509test/contains_utf8string.pem
new file mode 100644
index 000000000..ddcd4b046
--- /dev/null
+++ b/src/tests/data/x509test/contains_utf8string.pem
@@ -0,0 +1,24 @@
+-----BEGIN CERTIFICATE-----
+MIID9DCCAtwCCQD6cbgDx1XA/jANBgkqhkiG9w0BAQUFADCBuzEkMCIGA1UEAwwb
+0J7Qv9C40YHQsNC90LjQtSDRgdCw0LnRgtCwMSEwHwYJKoZIhvcNAQkBFhJ0ZXN0
+QHJhbmRvbWNvcnAucnUxIDAeBgNVBAoMF9Cc0L7RjyDQutC+0LzQv9Cw0L3QuNGP
+MSowKAYDVQQLDCHQnNC+0ZEg0L/QvtC00YDQsNC30LTQtdC70LXQvdC40LUxFTAT
+BgNVBAcMDNCc0L7RgdC60LLQsDELMAkGA1UEBhMCUlUwHhcNMTcxMDExMjAzNTQ5
+WhcNMTgxMDExMjAzNTQ5WjCBuzEkMCIGA1UEAwwb0J7Qv9C40YHQsNC90LjQtSDR
+gdCw0LnRgtCwMSEwHwYJKoZIhvcNAQkBFhJ0ZXN0QHJhbmRvbWNvcnAucnUxIDAe
+BgNVBAoMF9Cc0L7RjyDQutC+0LzQv9Cw0L3QuNGPMSowKAYDVQQLDCHQnNC+0ZEg
+0L/QvtC00YDQsNC30LTQtdC70LXQvdC40LUxFTATBgNVBAcMDNCc0L7RgdC60LLQ
+sDELMAkGA1UEBhMCUlUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDL
+K3xsfEoH/+mjq3scoZ6TfKvlTugzjOSirenGsSdw6IuxEy6ywEFb9YDSKyvMSDOG
+LhbotiKvn3n9WwKYhEMkNhMJDavb4s+CRYPtM4iBhzO3gTuNMqBAjKayykOWWNEq
+b6lgRivfo62iCetvp0zoXHVEyomfSUCl11sQ21etwOdnloocyRqKfDHHp2jO5J0g
+0HEQx2klxuivDU3lpEHRhH4cE5zUMStOdXaHm5nYnPUYnSrFinGLE01l7/MXsJwX
+AOVwBv3ErIh173KuwtyPci8AK16UNQqqvGy9QDEvH3TxMxrl7416K/iqCZg5d0FG
+HmsAbrGfT9pUA3IScSphAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAHM8W84yxiIV
+o87x6usP+BCQ5T7IIb7NgjnseJUS+dl0gJEyFdLBa4mz5FDdtaYEi3firc3NOJ6l
+yA4kEQ49k4I3yaDEjMuc1+qpzThdtC/+cycLCYuoaYxR/kx4/zoLcELsk8rud9Dq
+8N6g7q7MR6Nno3to3kHzrka/P0W6X8jfWmYm2RDTKhBPlcobTvgIyupn6uadWhY8
+Ahte186a2ylV/feFHIBuFr9jLzWKPMQm6MmPv86ZatdfqSNOU/YtKAQyLouoT45b
+urVwAyOlYDyiXsTfzTcsOAa9sHHAzofK2E+tZ0gY3s7JT1kEWVG5XoJWx+hKM5Ht
+hGah1kV664Y=
+-----END CERTIFICATE-----
diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp
index 5c54f2bb4..c99fa41d9 100644
--- a/src/tests/test_asn1.cpp
+++ b/src/tests/test_asn1.cpp
@@ -9,6 +9,7 @@
#if defined(BOTAN_HAS_ASN1)
#include <botan/der_enc.h>
#include <botan/ber_dec.h>
+ #include <botan/asn1_str.h>
#endif
namespace Botan_Tests {
@@ -44,6 +45,185 @@ Test::Result test_ber_stack_recursion()
return result;
}
+Test::Result test_asn1_utf8_ascii_parsing()
+ {
+ Test::Result result("ASN.1 ASCII parsing");
+
+ try
+ {
+ // \x13 - ASN1 tag for 'printable string'
+ // \x06 - 6 characters of payload
+ // ... - UTF-8 encoded (ASCII chars only) word 'Moscow'
+ const std::string moscow =
+ "\x13\x06\x4D\x6F\x73\x63\x6F\x77";
+ const std::string moscow_plain = "Moscow";
+ Botan::DataSource_Memory input(moscow.data());
+ Botan::BER_Decoder dec(input);
+
+ Botan::ASN1_String str;
+ str.decode_from(dec);
+
+ result.test_eq("value()", str.value(), moscow_plain);
+ }
+ catch(const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_asn1_utf8_parsing()
+ {
+ Test::Result result("ASN.1 UTF-8 parsing");
+
+ try
+ {
+ // \x0C - ASN1 tag for 'UTF8 string'
+ // \x0C - 12 characters of payload
+ // ... - UTF-8 encoded russian word for Moscow in cyrillic script
+ const std::string moscow =
+ "\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+ const std::string moscow_plain =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+ Botan::DataSource_Memory input(moscow.data());
+ Botan::BER_Decoder dec(input);
+
+ Botan::ASN1_String str;
+ str.decode_from(dec);
+
+ result.test_eq("value()", str.value(), moscow_plain);
+ }
+ catch(const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_asn1_ucs2_parsing()
+ {
+ Test::Result result("ASN.1 BMP string (UCS-2) parsing");
+
+ try
+ {
+ // \x1E - ASN1 tag for 'BMP (UCS-2) string'
+ // \x0C - 12 characters of payload
+ // ... - UCS-2 encoding for Moscow in cyrillic script
+ const std::string moscow =
+ "\x1E\x0C\x04\x1C\x04\x3E\x04\x41\x04\x3A\x04\x32\x04\x30";
+ const std::string moscow_plain =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+
+ Botan::DataSource_Memory input(moscow.data());
+ Botan::BER_Decoder dec(input);
+
+ Botan::ASN1_String str;
+ str.decode_from(dec);
+
+ result.test_eq("value()", str.value(), moscow_plain);
+ }
+ catch(const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_asn1_ucs4_parsing()
+ {
+ Test::Result result("ASN.1 universal string (UCS-4) parsing");
+
+ try
+ {
+ // \x1C - ASN1 tag for 'universal string'
+ // \x18 - 24 characters of payload
+ // ... - UCS-4 encoding for Moscow in cyrillic script
+ const Botan::byte moscow[] =
+ "\x1C\x18\x00\x00\x04\x1C\x00\x00\x04\x3E\x00\x00\x04\x41\x00\x00\x04\x3A\x00\x00\x04\x32\x00\x00\x04\x30";
+ const std::string moscow_plain =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+ Botan::DataSource_Memory input(moscow, sizeof(moscow));
+ Botan::BER_Decoder dec(input);
+
+ Botan::ASN1_String str;
+ str.decode_from(dec);
+
+ result.test_eq("value()", str.value(), moscow_plain);
+ }
+ catch(const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_asn1_ascii_encoding()
+ {
+ Test::Result result("ASN.1 ASCII encoding");
+
+ try
+ {
+ // UTF-8 encoded (ASCII chars only) word 'Moscow'
+ const std::string moscow =
+ "\x4D\x6F\x73\x63\x6F\x77";
+ Botan::ASN1_String str(moscow);
+
+ Botan::DER_Encoder enc;
+
+ str.encode_into(enc);
+ auto encodingResult = enc.get_contents();
+
+ // \x13 - ASN1 tag for 'printable string'
+ // \x06 - 6 characters of payload
+ const auto moscowEncoded = Botan::hex_decode("13064D6F73636F77");
+ result.test_eq("encoding result", encodingResult, moscowEncoded);
+
+ result.test_success("No crash");
+ }
+ catch(const std::exception &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_asn1_utf8_encoding()
+ {
+ Test::Result result("ASN.1 UTF-8 encoding");
+
+ try
+ {
+ // UTF-8 encoded russian word for Moscow in cyrillic script
+ const std::string moscow =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+ Botan::ASN1_String str(moscow);
+
+ Botan::DER_Encoder enc;
+
+ str.encode_into(enc);
+ auto encodingResult = enc.get_contents();
+
+ // \x0C - ASN1 tag for 'UTF8 string'
+ // \x0C - 12 characters of payload
+ const auto moscowEncoded =
+ Botan::hex_decode("0C0CD09CD0BED181D0BAD0B2D0B0");
+ result.test_eq("encoding result", encodingResult, moscowEncoded);
+
+ result.test_success("No crash");
+ }
+ catch(const std::exception &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
}
class ASN1_Tests final : public Test
@@ -54,6 +234,12 @@ class ASN1_Tests final : public Test
std::vector<Test::Result> results;
results.push_back(test_ber_stack_recursion());
+ results.push_back(test_asn1_utf8_ascii_parsing());
+ results.push_back(test_asn1_utf8_parsing());
+ results.push_back(test_asn1_ucs2_parsing());
+ results.push_back(test_asn1_ucs4_parsing());
+ results.push_back(test_asn1_ascii_encoding());
+ results.push_back(test_asn1_utf8_encoding());
return results;
}
diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp
index da2d25d5e..d102a3e46 100644
--- a/src/tests/test_utils.cpp
+++ b/src/tests/test_utils.cpp
@@ -6,6 +6,8 @@
* Botan is released under the Simplified BSD License (see license.txt)
*/
+#define BOTAN_NO_DEPRECATED_WARNINGS
+
#include "tests.h"
#include <functional>
#include <ctime>
@@ -421,17 +423,15 @@ class Charset_Tests final : public Text_Based_Test
{
converted = Botan::ucs4_to_utf8(in.data(), in.size());
}
- else if(type == "UTF16-LATIN1")
+ else if(type == "UTF8-LATIN1")
{
- converted = Botan::Charset::transcode(in_str,
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UCS2_CHARSET);
+ converted = Botan::utf8_to_latin1(in_str);
}
- else if(type == "UTF8-LATIN1")
+ else if(type == "UTF16-LATIN1")
{
converted = Botan::Charset::transcode(in_str,
Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::Character_Set::UCS2_CHARSET);
}
else if(type == "LATIN1-UTF8")
{
@@ -484,32 +484,25 @@ class Charset_Tests final : public Text_Based_Test
result.test_throws("conversion fails for non-Latin1 characters", []()
{
// "abcdefÅžabcdef"
- std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5,
- 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
- };
+ const std::vector<uint8_t> input = {
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5,
+ 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
+ };
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
result.test_throws("invalid utf-8 string", []()
{
// sequence truncated
- std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 };
-
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ const std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 };
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
result.test_throws("invalid utf-8 string", []()
{
std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 };
-
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
return result;
diff --git a/src/tests/unit_x509.cpp b/src/tests/unit_x509.cpp
index b9aa1709e..d635f7fe1 100644
--- a/src/tests/unit_x509.cpp
+++ b/src/tests/unit_x509.cpp
@@ -358,6 +358,69 @@ Test::Result test_x509_dates()
return result;
}
+Test::Result test_x509_utf8()
+ {
+ Test::Result result("X509 with UTF-8 encoded fields");
+
+ try
+ {
+ Botan::X509_Certificate utf8_cert(Test::data_file("x509test/contains_utf8string.pem"));
+
+ // UTF-8 encoded fields of test certificate (contains cyrillic letters)
+ const std::string organization =
+ "\xD0\x9C\xD0\xBE\xD1\x8F\x20\xD0\xBA\xD0\xBE\xD0"
+ "\xBC\xD0\xBF\xD0\xB0\xD0\xBD\xD0\xB8\xD1\x8F";
+ const std::string organization_unit =
+ "\xD0\x9C\xD0\xBE\xD1\x91\x20\xD0\xBF\xD0\xBE\xD0\xB4\xD1\x80\xD0\xB0"
+ "\xD0\xB7\xD0\xB4\xD0\xB5\xD0\xBB\xD0\xB5\xD0\xBD\xD0\xB8\xD0\xB5";
+ const std::string common_name =
+ "\xD0\x9E\xD0\xBF\xD0\xB8\xD1\x81\xD0\xB0\xD0\xBD\xD0\xB8"
+ "\xD0\xB5\x20\xD1\x81\xD0\xB0\xD0\xB9\xD1\x82\xD0\xB0";
+ const std::string location =
+ "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0";
+
+ result.test_eq("O", utf8_cert.issuer_info("O").at(0), organization);
+ result.test_eq("OU", utf8_cert.issuer_info("OU").at(0), organization_unit);
+ result.test_eq("CN", utf8_cert.issuer_info("CN").at(0), common_name);
+ result.test_eq("L", utf8_cert.issuer_info("L").at(0), location);
+ }
+ catch (const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
+Test::Result test_x509_bmpstring()
+ {
+ Test::Result result("X509 with UCS-2 (BMPString) encoded fields");
+
+ try
+ {
+ Botan::X509_Certificate ucs2_cert(Test::data_file("x509test/contains_bmpstring.pem"));
+
+ // UTF-8 encoded fields of test certificate (contains cyrillic and greek letters)
+ const std::string organization =
+ "\x6E\x65\xCF\x87\xCF\xB5\x6E\x69\xCF\x89";
+ const std::string common_name =
+ "\xC3\xA8\x6E\xC7\x9D\xD0\xAF\x20\xD0\x9C\xC7\x9D\xD0\xB9\xD0\xB7\xD1\x8D\xD0\xBB";
+
+ // UTF-8 encoded fields of test certificate (contains only ASCII characters)
+ const std::string location = "Berlin";
+
+ result.test_eq("O", ucs2_cert.issuer_info("O").at(0), organization);
+ result.test_eq("CN", ucs2_cert.issuer_info("CN").at(0), common_name);
+ result.test_eq("L", ucs2_cert.issuer_info("L").at(0), location);
+ }
+ catch (const Botan::Decoding_Error &ex)
+ {
+ result.test_failure(ex.what());
+ }
+
+ return result;
+ }
+
Test::Result test_x509_cert(const std::string& sig_algo, const std::string& hash_fn = "SHA-256")
{
Test::Result result("X509 Unit");
@@ -1135,6 +1198,8 @@ class X509_Cert_Unit_Tests final : public Test
results.push_back(test_x509_dates());
results.push_back(test_cert_status_strings());
results.push_back(test_hashes("ECDSA"));
+ results.push_back(test_x509_utf8());
+ results.push_back(test_x509_bmpstring());
return results;
}