diff options
author | Jack Lloyd <[email protected]> | 2017-11-10 08:23:45 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-11-10 08:23:45 -0500 |
commit | bff1490ac8001fe8dcfdc145c44c6cce28a0cba1 (patch) | |
tree | e70dda7d11abd6683a9376c35930d23e1b68c2aa | |
parent | 2abac405f483b45ddb653f710be396d8d37936b2 (diff) | |
parent | ba16efe696d181b0165e57f86cade99934aa788f (diff) |
Merge GH #1287 #1289 Use UTF-8 encoding for ASN.1 strings
-rw-r--r-- | doc/manual/x509.rst | 13 | ||||
-rw-r--r-- | src/cli/asn1.cpp | 15 | ||||
-rw-r--r-- | src/lib/asn1/asn1_alt_name.cpp | 13 | ||||
-rw-r--r-- | src/lib/asn1/asn1_obj.h | 1 | ||||
-rw-r--r-- | src/lib/asn1/asn1_str.cpp | 103 | ||||
-rw-r--r-- | src/lib/asn1/asn1_str.h | 18 | ||||
-rw-r--r-- | src/lib/asn1/asn1_time.cpp | 11 | ||||
-rw-r--r-- | src/lib/asn1/info.txt | 2 | ||||
-rw-r--r-- | src/lib/utils/charset.cpp | 58 | ||||
-rw-r--r-- | src/lib/utils/charset.h | 44 | ||||
-rw-r--r-- | src/lib/x509/name_constraint.cpp | 3 | ||||
-rw-r--r-- | src/lib/x509/x509_ext.cpp | 7 | ||||
-rw-r--r-- | src/tests/data/x509test/contains_bmpstring.pem | 22 | ||||
-rw-r--r-- | src/tests/data/x509test/contains_utf8string.pem | 24 | ||||
-rw-r--r-- | src/tests/test_asn1.cpp | 186 | ||||
-rw-r--r-- | src/tests/test_utils.cpp | 35 | ||||
-rw-r--r-- | src/tests/unit_x509.cpp | 65 |
17 files changed, 447 insertions, 173 deletions
diff --git a/doc/manual/x509.rst b/doc/manual/x509.rst index 72a7b32b7..1fb6d90f5 100644 --- a/doc/manual/x509.rst +++ b/doc/manual/x509.rst @@ -79,19 +79,6 @@ associated with a position of some sort in the organization. It may also include fields for state/province and locality. What a locality is, nobody knows, but it's usually given as a city name. -Botan doesn't currently support any of the Unicode variants used in -ASN.1 (UTF-8, UCS-2, and UCS-4), any of which could be used for the -fields in the DN. This could be problematic, particularly in Asia and -other areas where non-ASCII characters are needed for most names. The -UTF-8 and UCS-2 string types *are* accepted (in fact, UTF-8 is used -when encoding much of the time), but if any of the characters included -in the string are not in ISO 8859-1 (ie 0 ... 255), an exception will -get thrown. Currently the ``ASN1_String`` type holds its data as ISO -8859-1 internally (regardless of local character set); this would have -to be changed to hold UCS-2 or UCS-4 in order to support Unicode -(also, many interfaces in the X.509 code would have to accept or -return a ``std::wstring`` instead of a ``std::string``). - Like the distinguished names, subject alternative names can contain a lot of things that Botan will flat out ignore (most of which you would likely never want to use). However, there are three very useful pieces of information that diff --git a/src/cli/asn1.cpp b/src/cli/asn1.cpp index 5c90a3c5b..234cbd6e6 100644 --- a/src/cli/asn1.cpp +++ b/src/cli/asn1.cpp @@ -22,9 +22,6 @@ #include <sstream> #include <ctype.h> -// Set this if your terminal understands UTF-8; otherwise output is in Latin-1 -#define UTF8_TERMINAL 1 - namespace Botan_CLI { namespace { @@ -349,17 +346,7 @@ void decode(std::ostream& output, { Botan::ASN1_String str; data.decode(str); - if(UTF8_TERMINAL) - { - emit(output, type_name(type_tag), level, length, - Botan::Charset::transcode(str.iso_8859(), - Botan::UTF8_CHARSET, - Botan::LATIN1_CHARSET)); - } - else - { - emit(output, type_name(type_tag), level, length, str.iso_8859()); - } + emit(output, type_name(type_tag), level, length, str.value()); } else if(type_tag == Botan::UTC_TIME || type_tag == Botan::GENERALIZED_TIME) { diff --git a/src/lib/asn1/asn1_alt_name.cpp b/src/lib/asn1/asn1_alt_name.cpp index 7bd4cd494..940312886 100644 --- a/src/lib/asn1/asn1_alt_name.cpp +++ b/src/lib/asn1/asn1_alt_name.cpp @@ -11,7 +11,6 @@ #include <botan/ber_dec.h> #include <botan/oids.h> #include <botan/internal/stl_util.h> -#include <botan/charset.h> #include <botan/parsing.h> #include <botan/loadstor.h> @@ -133,7 +132,7 @@ void encode_entries(DER_Encoder& encoder, if(type == "RFC822" || type == "DNS" || type == "URI") { ASN1_String asn1_string(i->second, IA5_STRING); - encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.iso_8859()); + encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.value()); } else if(type == "IP") { @@ -218,13 +217,9 @@ void AlternativeName::decode_from(BER_Decoder& source) } else if(tag == 1 || tag == 2 || tag == 6) { - const std::string value = Charset::transcode(ASN1::to_string(obj), - LATIN1_CHARSET, - LOCAL_CHARSET); - - if(tag == 1) add_attribute("RFC822", value); - if(tag == 2) add_attribute("DNS", value); - if(tag == 6) add_attribute("URI", value); + if(tag == 1) add_attribute("RFC822", ASN1::to_string(obj)); + if(tag == 2) add_attribute("DNS", ASN1::to_string(obj)); + if(tag == 6) add_attribute("URI", ASN1::to_string(obj)); } else if(tag == 7) { diff --git a/src/lib/asn1/asn1_obj.h b/src/lib/asn1/asn1_obj.h index 63c7dc2e3..95b84c5c1 100644 --- a/src/lib/asn1/asn1_obj.h +++ b/src/lib/asn1/asn1_obj.h @@ -45,6 +45,7 @@ enum ASN1_Tag { T61_STRING = 0x14, IA5_STRING = 0x16, VISIBLE_STRING = 0x1A, + UNIVERSAL_STRING = 0x1C, BMP_STRING = 0x1E, UTC_TIME = 0x17, diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp index 526e10158..d90aa215b 100644 --- a/src/lib/asn1/asn1_str.cpp +++ b/src/lib/asn1/asn1_str.cpp @@ -17,8 +17,7 @@ namespace { /* * Choose an encoding for the string */ -ASN1_Tag choose_encoding(const std::string& str, - const std::string& type) +ASN1_Tag choose_encoding(const std::string& str) { static const uint8_t IS_PRINTABLE[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -48,40 +47,49 @@ ASN1_Tag choose_encoding(const std::string& str, { if(!IS_PRINTABLE[static_cast<uint8_t>(str[i])]) { - if(type == "utf8") return UTF8_STRING; - if(type == "latin1") return T61_STRING; - throw Invalid_Argument("choose_encoding: Bad string type " + type); + return UTF8_STRING; } } return PRINTABLE_STRING; } +void assert_is_string_type(ASN1_Tag tag) + { + if(tag != NUMERIC_STRING && + tag != PRINTABLE_STRING && + tag != VISIBLE_STRING && + tag != T61_STRING && + tag != IA5_STRING && + tag != UTF8_STRING && + tag != BMP_STRING && + tag != UNIVERSAL_STRING) + { + throw Invalid_Argument("ASN1_String: Unknown string type " + + std::to_string(tag)); + } + } + } /* * Create an ASN1_String */ -ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_iso_8859_str(Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET)), m_tag(t) +ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_utf8_str(str), m_tag(t) { - if(m_tag == DIRECTORY_STRING) - m_tag = choose_encoding(m_iso_8859_str, "latin1"); - - if(m_tag != NUMERIC_STRING && - m_tag != PRINTABLE_STRING && - m_tag != VISIBLE_STRING && - m_tag != T61_STRING && - m_tag != IA5_STRING && - m_tag != UTF8_STRING && - m_tag != BMP_STRING) - throw Invalid_Argument("ASN1_String: Unknown string type " + - std::to_string(m_tag)); + { + m_tag = choose_encoding(m_utf8_str); + } + + assert_is_string_type(m_tag); } /* * Create an ASN1_String */ -ASN1_String::ASN1_String(const std::string& str) : m_iso_8859_str(Charset::transcode(str, LOCAL_CHARSET, LATIN1_CHARSET)), m_tag(choose_encoding(m_iso_8859_str, "latin1")) +ASN1_String::ASN1_String(const std::string& str) : + m_utf8_str(str), + m_tag(choose_encoding(m_utf8_str)) {} /* @@ -89,23 +97,7 @@ ASN1_String::ASN1_String(const std::string& str) : m_iso_8859_str(Charset::trans */ std::string ASN1_String::iso_8859() const { - return m_iso_8859_str; - } - -/* -* Return this string in local encoding -*/ -std::string ASN1_String::value() const - { - return Charset::transcode(m_iso_8859_str, LATIN1_CHARSET, LOCAL_CHARSET); - } - -/* -* Return the type of this string object -*/ -ASN1_Tag ASN1_String::tagging() const - { - return m_tag; + return utf8_to_latin1(m_utf8_str); } /* @@ -113,10 +105,15 @@ ASN1_Tag ASN1_String::tagging() const */ void ASN1_String::encode_into(DER_Encoder& encoder) const { - std::string value = iso_8859(); - if(tagging() == UTF8_STRING) - value = Charset::transcode(value, LATIN1_CHARSET, UTF8_CHARSET); - encoder.add_object(tagging(), UNIVERSAL, value); + if(m_data.empty()) + { + encoder.add_object(tagging(), UNIVERSAL, m_utf8_str); + } + else + { + // If this string was decoded, reserialize using original encoding + encoder.add_object(tagging(), UNIVERSAL, m_data.data(), m_data.size()); + } } /* @@ -126,18 +123,24 @@ void ASN1_String::decode_from(BER_Decoder& source) { BER_Object obj = source.get_next_object(); - Character_Set charset_is; + assert_is_string_type(obj.type_tag); - if(obj.type_tag == BMP_STRING) - charset_is = UCS2_CHARSET; - else if(obj.type_tag == UTF8_STRING) - charset_is = UTF8_CHARSET; - else - charset_is = LATIN1_CHARSET; + m_tag = obj.type_tag; + m_data.assign(obj.value.begin(), obj.value.end()); - *this = ASN1_String( - Charset::transcode(ASN1::to_string(obj), LOCAL_CHARSET, charset_is), - obj.type_tag); + if(m_tag == BMP_STRING) + { + m_utf8_str = ucs2_to_utf8(m_data.data(), m_data.size()); + } + else if(m_tag == UNIVERSAL_STRING) + { + m_utf8_str = ucs4_to_utf8(m_data.data(), m_data.size()); + } + else + { + // All other supported string types are UTF-8 or some subset thereof + m_utf8_str = ASN1::to_string(obj); + } } } diff --git a/src/lib/asn1/asn1_str.h b/src/lib/asn1/asn1_str.h index 9e0237550..f19265494 100644 --- a/src/lib/asn1/asn1_str.h +++ b/src/lib/asn1/asn1_str.h @@ -13,7 +13,8 @@ namespace Botan { /** -* Simple String +* ASN.1 string type +* This class normalizes all inputs to a UTF-8 std::string */ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object { @@ -21,15 +22,18 @@ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object void encode_into(class DER_Encoder&) const override; void decode_from(class BER_Decoder&) override; - std::string value() const; - std::string iso_8859() const; + ASN1_Tag tagging() const { return m_tag; } + + const std::string& value() const { return m_utf8_str; } - ASN1_Tag tagging() const; + std::string BOTAN_DEPRECATED("Use value() to get UTF-8 string instead") + iso_8859() const; - explicit ASN1_String(const std::string& = ""); - ASN1_String(const std::string&, ASN1_Tag); + explicit ASN1_String(const std::string& utf8 = ""); + ASN1_String(const std::string& utf8, ASN1_Tag tag); private: - std::string m_iso_8859_str; + std::vector<uint8_t> m_data; + std::string m_utf8_str; ASN1_Tag m_tag; }; diff --git a/src/lib/asn1/asn1_time.cpp b/src/lib/asn1/asn1_time.cpp index 2cd225915..f6a0c414e 100644 --- a/src/lib/asn1/asn1_time.cpp +++ b/src/lib/asn1/asn1_time.cpp @@ -8,7 +8,6 @@ #include <botan/asn1_time.h> #include <botan/der_enc.h> #include <botan/ber_dec.h> -#include <botan/charset.h> #include <botan/exceptn.h> #include <botan/parsing.h> #include <botan/calendar.h> @@ -41,20 +40,14 @@ void X509_Time::encode_into(DER_Encoder& der) const if(m_tag != GENERALIZED_TIME && m_tag != UTC_TIME) throw Invalid_Argument("X509_Time: Bad encoding tag"); - der.add_object(m_tag, UNIVERSAL, - Charset::transcode(to_string(), - LOCAL_CHARSET, - LATIN1_CHARSET)); + der.add_object(m_tag, UNIVERSAL, to_string()); } void X509_Time::decode_from(BER_Decoder& source) { BER_Object ber_time = source.get_next_object(); - set_to(Charset::transcode(ASN1::to_string(ber_time), - LATIN1_CHARSET, - LOCAL_CHARSET), - ber_time.type_tag); + set_to(ASN1::to_string(ber_time), ber_time.type_tag); } std::string X509_Time::to_string() const diff --git a/src/lib/asn1/info.txt b/src/lib/asn1/info.txt index 4b3689f45..4772e1ca7 100644 --- a/src/lib/asn1/info.txt +++ b/src/lib/asn1/info.txt @@ -1,5 +1,5 @@ <defines> -ASN1 -> 20161102 +ASN1 -> 20171109 </defines> <requires> diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp index dadee8f78..ca32c652d 100644 --- a/src/lib/utils/charset.cpp +++ b/src/lib/utils/charset.cpp @@ -92,34 +92,6 @@ std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) return s; } -namespace Charset { - -namespace { - -/* -* Convert from UCS-2 to ISO 8859-1 -*/ -std::string ucs2_to_latin1(const std::string& ucs2) - { - if(ucs2.size() % 2 == 1) - throw Decoding_Error("UCS-2 string has an odd number of bytes"); - - std::string latin1; - - for(size_t i = 0; i != ucs2.size(); i += 2) - { - const uint8_t c1 = ucs2[i]; - const uint8_t c2 = ucs2[i+1]; - - if(c1 != 0) - throw Decoding_Error("UCS-2 has non-Latin1 characters"); - - latin1 += static_cast<char>(c2); - } - - return latin1; - } - /* * Convert from UTF-8 to ISO 8859-1 */ @@ -133,7 +105,9 @@ std::string utf8_to_latin1(const std::string& utf8) const uint8_t c1 = static_cast<uint8_t>(utf8[position++]); if(c1 <= 0x7F) + { iso8859 += static_cast<char>(c1); + } else if(c1 >= 0xC0 && c1 <= 0xC7) { if(position == utf8.size()) @@ -154,6 +128,34 @@ std::string utf8_to_latin1(const std::string& utf8) return iso8859; } +namespace Charset { + +namespace { + +/* +* Convert from UCS-2 to ISO 8859-1 +*/ +std::string ucs2_to_latin1(const std::string& ucs2) + { + if(ucs2.size() % 2 == 1) + throw Decoding_Error("UCS-2 string has an odd number of bytes"); + + std::string latin1; + + for(size_t i = 0; i != ucs2.size(); i += 2) + { + const uint8_t c1 = ucs2[i]; + const uint8_t c2 = ucs2[i+1]; + + if(c1 != 0) + throw Decoding_Error("UCS-2 has non-Latin1 characters"); + + latin1 += static_cast<char>(c2); + } + + return latin1; + } + /* * Convert from ISO 8859-1 to UTF-8 */ diff --git a/src/lib/utils/charset.h b/src/lib/utils/charset.h index 3f2ff9912..4913f0a5a 100644 --- a/src/lib/utils/charset.h +++ b/src/lib/utils/charset.h @@ -14,16 +14,6 @@ namespace Botan { /** -* The different charsets (nominally) supported by Botan. -*/ -enum Character_Set { - LOCAL_CHARSET, - UCS2_CHARSET, - UTF8_CHARSET, - LATIN1_CHARSET -}; - -/** * Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string * This is used for ASN.1 BMPString type * @param ucs2 the sequence of UCS-2 characters @@ -39,15 +29,41 @@ std::string BOTAN_UNSTABLE_API ucs2_to_utf8(const uint8_t ucs2[], size_t len); */ std::string BOTAN_UNSTABLE_API ucs4_to_utf8(const uint8_t ucs4[], size_t len); +/** +* Convert a UTF-8 string to Latin-1 +* If a character outside the Latin-1 range is encountered, an exception is thrown. +*/ +std::string BOTAN_UNSTABLE_API utf8_to_latin1(const std::string& utf8); + +/** +* The different charsets (nominally) supported by Botan. +*/ +enum Character_Set { + LOCAL_CHARSET, + UCS2_CHARSET, + UTF8_CHARSET, + LATIN1_CHARSET +}; + namespace Charset { /* -* Character Set Handling +* Character set conversion - avoid this. +* For specific conversions, use the functions above like +* ucs2_to_utf8 and utf8_to_latin1 +* +* If you need something more complex than that, use a real library +* such as iconv, Boost.Locale, or ICU */ -std::string BOTAN_PUBLIC_API(2,0) transcode(const std::string& str, - Character_Set to, - Character_Set from); +std::string BOTAN_PUBLIC_API(2,0) + BOTAN_DEPRECATED("Avoid. See comment in header.") + transcode(const std::string& str, + Character_Set to, + Character_Set from); +/* +* Simple character classifier functions +*/ bool BOTAN_PUBLIC_API(2,0) is_digit(char c); bool BOTAN_PUBLIC_API(2,0) is_space(char c); bool BOTAN_PUBLIC_API(2,0) caseless_cmp(char x, char y); diff --git a/src/lib/x509/name_constraint.cpp b/src/lib/x509/name_constraint.cpp index e27dca9ec..e098bcd8d 100644 --- a/src/lib/x509/name_constraint.cpp +++ b/src/lib/x509/name_constraint.cpp @@ -7,7 +7,6 @@ #include <botan/name_constraint.h> #include <botan/ber_dec.h> -#include <botan/charset.h> #include <botan/loadstor.h> #include <botan/x509_dn.h> #include <botan/x509cert.h> @@ -49,7 +48,7 @@ void GeneralName::decode_from(class BER_Decoder& ber) if(tag == 1 || tag == 2 || tag == 6) { - m_name = Charset::transcode(ASN1::to_string(obj), LATIN1_CHARSET, LOCAL_CHARSET); + m_name = ASN1::to_string(obj); if(tag == 1) { diff --git a/src/lib/x509/x509_ext.cpp b/src/lib/x509/x509_ext.cpp index 3141d3c44..6e4c29d42 100644 --- a/src/lib/x509/x509_ext.cpp +++ b/src/lib/x509/x509_ext.cpp @@ -12,7 +12,6 @@ #include <botan/der_enc.h> #include <botan/ber_dec.h> #include <botan/oids.h> -#include <botan/charset.h> #include <botan/internal/bit_ops.h> #include <algorithm> #include <sstream> @@ -735,7 +734,7 @@ std::vector<uint8_t> Authority_Information_Access::encode_inner() const .start_cons(SEQUENCE) .start_cons(SEQUENCE) .encode(OIDS::lookup("PKIX.OCSP")) - .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.iso_8859()) + .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.value()) .end_cons() .end_cons().get_contents_unlocked(); } @@ -758,9 +757,7 @@ void Authority_Information_Access::decode_inner(const std::vector<uint8_t>& in) if(name.type_tag == 6 && name.class_tag == CONTEXT_SPECIFIC) { - m_ocsp_responder = Charset::transcode(ASN1::to_string(name), - LATIN1_CHARSET, - LOCAL_CHARSET); + m_ocsp_responder = ASN1::to_string(name); } } diff --git a/src/tests/data/x509test/contains_bmpstring.pem b/src/tests/data/x509test/contains_bmpstring.pem new file mode 100644 index 000000000..c204c4296 --- /dev/null +++ b/src/tests/data/x509test/contains_bmpstring.pem @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDmDCCAoACCQCJ7TVHW6qlLDANBgkqhkiG9w0BAQUFADCBjTELMAkGA1UEBhMC +REUxDzANBgNVBAcTBkJlcmxpbjEXMBUGA1UECh4OAG4AZQPHA/UAbgBpA8kxCzAJ +BgNVBAsTAkRDMR8wHQYDVQQDHhYA6ABuAd0ELwAgBBwB3QQ5BDcETQQ7MSYwJAYJ +KoZIhvcNAQkBFhdyZW5lLm1ldXNlbEBuZXhlbmlvLmNvbTAeFw0xNzExMTAwODQ5 +MzFaFw0xODExMTAwODQ5MzFaMIGNMQswCQYDVQQGEwJERTEPMA0GA1UEBxMGQmVy +bGluMRcwFQYDVQQKHg4AbgBlA8cD9QBuAGkDyTELMAkGA1UECxMCREMxHzAdBgNV +BAMeFgDoAG4B3QQvACAEHAHdBDkENwRNBDsxJjAkBgkqhkiG9w0BCQEWF3JlbmUu +bWV1c2VsQG5leGVuaW8uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAvykcZZN3B+pWCi4eNnVv4jITDendIJhfjELocgALmgCJer5XH0gdChJMqHre +bnhpBHEdgivMvVGN2BDCkz5+4WshqwZ8lSXIMlHXaaIy7CulhJUnj2lTsa75jr2F +BmxUF+NwQMrBOOIs2IFlfqeEnlXgRD3pBv9erZI5ng3ciTgXMCbg21t1E56706cD +sLKv2OWXZrz+KLIaNvNV5pNW1wLup3sCKhtFRaH721crp2KROvAGkb0R6oNannKm +DQieQ1g4AR94ihCc1SjaoMydzrFhmyArgsusnkbrl6n14kHfSuyUul355ejSOz+V +k0HGQMykh2WDo2MPy06oxw5ZAwIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQCoE+4W +OLQ3FuTigBYf7Hv0D+qY58XIV0D9YgJYrCHJL7S31NTbCuoYK2NdfcwE1MUUxb28 +nCIR3jgawN1WlHCeiLkhrD6TnMNjvZ34xVqSmkQ5zIJTo909wFhjGjtByP5fAQqt +uja2bJOUOg0GRsbrv1c8zCYnz5+bhRNrZqdxWQzB9c10QiMrReYxFY4wPwIVK9y5 +6eYA4dkkhiRV+KAtyPQokn9N5kUM4VjlyYS3IB3DzXlB3Z6mlHP7t/G5Su1Nmkxu +NhqtL0Vg+oo6DDuzsI+1WZO9saPJnUPSNwu3BDIu56gWHlHw6dZksVK0J7eF5n8d +rPULI83gQXKxuD+C +-----END CERTIFICATE----- diff --git a/src/tests/data/x509test/contains_utf8string.pem b/src/tests/data/x509test/contains_utf8string.pem new file mode 100644 index 000000000..ddcd4b046 --- /dev/null +++ b/src/tests/data/x509test/contains_utf8string.pem @@ -0,0 +1,24 @@ +-----BEGIN CERTIFICATE----- +MIID9DCCAtwCCQD6cbgDx1XA/jANBgkqhkiG9w0BAQUFADCBuzEkMCIGA1UEAwwb +0J7Qv9C40YHQsNC90LjQtSDRgdCw0LnRgtCwMSEwHwYJKoZIhvcNAQkBFhJ0ZXN0 +QHJhbmRvbWNvcnAucnUxIDAeBgNVBAoMF9Cc0L7RjyDQutC+0LzQv9Cw0L3QuNGP +MSowKAYDVQQLDCHQnNC+0ZEg0L/QvtC00YDQsNC30LTQtdC70LXQvdC40LUxFTAT +BgNVBAcMDNCc0L7RgdC60LLQsDELMAkGA1UEBhMCUlUwHhcNMTcxMDExMjAzNTQ5 +WhcNMTgxMDExMjAzNTQ5WjCBuzEkMCIGA1UEAwwb0J7Qv9C40YHQsNC90LjQtSDR +gdCw0LnRgtCwMSEwHwYJKoZIhvcNAQkBFhJ0ZXN0QHJhbmRvbWNvcnAucnUxIDAe +BgNVBAoMF9Cc0L7RjyDQutC+0LzQv9Cw0L3QuNGPMSowKAYDVQQLDCHQnNC+0ZEg +0L/QvtC00YDQsNC30LTQtdC70LXQvdC40LUxFTATBgNVBAcMDNCc0L7RgdC60LLQ +sDELMAkGA1UEBhMCUlUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDL +K3xsfEoH/+mjq3scoZ6TfKvlTugzjOSirenGsSdw6IuxEy6ywEFb9YDSKyvMSDOG +LhbotiKvn3n9WwKYhEMkNhMJDavb4s+CRYPtM4iBhzO3gTuNMqBAjKayykOWWNEq +b6lgRivfo62iCetvp0zoXHVEyomfSUCl11sQ21etwOdnloocyRqKfDHHp2jO5J0g +0HEQx2klxuivDU3lpEHRhH4cE5zUMStOdXaHm5nYnPUYnSrFinGLE01l7/MXsJwX +AOVwBv3ErIh173KuwtyPci8AK16UNQqqvGy9QDEvH3TxMxrl7416K/iqCZg5d0FG +HmsAbrGfT9pUA3IScSphAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAHM8W84yxiIV +o87x6usP+BCQ5T7IIb7NgjnseJUS+dl0gJEyFdLBa4mz5FDdtaYEi3firc3NOJ6l +yA4kEQ49k4I3yaDEjMuc1+qpzThdtC/+cycLCYuoaYxR/kx4/zoLcELsk8rud9Dq +8N6g7q7MR6Nno3to3kHzrka/P0W6X8jfWmYm2RDTKhBPlcobTvgIyupn6uadWhY8 +Ahte186a2ylV/feFHIBuFr9jLzWKPMQm6MmPv86ZatdfqSNOU/YtKAQyLouoT45b +urVwAyOlYDyiXsTfzTcsOAa9sHHAzofK2E+tZ0gY3s7JT1kEWVG5XoJWx+hKM5Ht +hGah1kV664Y= +-----END CERTIFICATE----- diff --git a/src/tests/test_asn1.cpp b/src/tests/test_asn1.cpp index 5c54f2bb4..c99fa41d9 100644 --- a/src/tests/test_asn1.cpp +++ b/src/tests/test_asn1.cpp @@ -9,6 +9,7 @@ #if defined(BOTAN_HAS_ASN1) #include <botan/der_enc.h> #include <botan/ber_dec.h> + #include <botan/asn1_str.h> #endif namespace Botan_Tests { @@ -44,6 +45,185 @@ Test::Result test_ber_stack_recursion() return result; } +Test::Result test_asn1_utf8_ascii_parsing() + { + Test::Result result("ASN.1 ASCII parsing"); + + try + { + // \x13 - ASN1 tag for 'printable string' + // \x06 - 6 characters of payload + // ... - UTF-8 encoded (ASCII chars only) word 'Moscow' + const std::string moscow = + "\x13\x06\x4D\x6F\x73\x63\x6F\x77"; + const std::string moscow_plain = "Moscow"; + Botan::DataSource_Memory input(moscow.data()); + Botan::BER_Decoder dec(input); + + Botan::ASN1_String str; + str.decode_from(dec); + + result.test_eq("value()", str.value(), moscow_plain); + } + catch(const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_asn1_utf8_parsing() + { + Test::Result result("ASN.1 UTF-8 parsing"); + + try + { + // \x0C - ASN1 tag for 'UTF8 string' + // \x0C - 12 characters of payload + // ... - UTF-8 encoded russian word for Moscow in cyrillic script + const std::string moscow = + "\x0C\x0C\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + const std::string moscow_plain = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + Botan::DataSource_Memory input(moscow.data()); + Botan::BER_Decoder dec(input); + + Botan::ASN1_String str; + str.decode_from(dec); + + result.test_eq("value()", str.value(), moscow_plain); + } + catch(const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_asn1_ucs2_parsing() + { + Test::Result result("ASN.1 BMP string (UCS-2) parsing"); + + try + { + // \x1E - ASN1 tag for 'BMP (UCS-2) string' + // \x0C - 12 characters of payload + // ... - UCS-2 encoding for Moscow in cyrillic script + const std::string moscow = + "\x1E\x0C\x04\x1C\x04\x3E\x04\x41\x04\x3A\x04\x32\x04\x30"; + const std::string moscow_plain = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + + Botan::DataSource_Memory input(moscow.data()); + Botan::BER_Decoder dec(input); + + Botan::ASN1_String str; + str.decode_from(dec); + + result.test_eq("value()", str.value(), moscow_plain); + } + catch(const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_asn1_ucs4_parsing() + { + Test::Result result("ASN.1 universal string (UCS-4) parsing"); + + try + { + // \x1C - ASN1 tag for 'universal string' + // \x18 - 24 characters of payload + // ... - UCS-4 encoding for Moscow in cyrillic script + const Botan::byte moscow[] = + "\x1C\x18\x00\x00\x04\x1C\x00\x00\x04\x3E\x00\x00\x04\x41\x00\x00\x04\x3A\x00\x00\x04\x32\x00\x00\x04\x30"; + const std::string moscow_plain = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + Botan::DataSource_Memory input(moscow, sizeof(moscow)); + Botan::BER_Decoder dec(input); + + Botan::ASN1_String str; + str.decode_from(dec); + + result.test_eq("value()", str.value(), moscow_plain); + } + catch(const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_asn1_ascii_encoding() + { + Test::Result result("ASN.1 ASCII encoding"); + + try + { + // UTF-8 encoded (ASCII chars only) word 'Moscow' + const std::string moscow = + "\x4D\x6F\x73\x63\x6F\x77"; + Botan::ASN1_String str(moscow); + + Botan::DER_Encoder enc; + + str.encode_into(enc); + auto encodingResult = enc.get_contents(); + + // \x13 - ASN1 tag for 'printable string' + // \x06 - 6 characters of payload + const auto moscowEncoded = Botan::hex_decode("13064D6F73636F77"); + result.test_eq("encoding result", encodingResult, moscowEncoded); + + result.test_success("No crash"); + } + catch(const std::exception &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_asn1_utf8_encoding() + { + Test::Result result("ASN.1 UTF-8 encoding"); + + try + { + // UTF-8 encoded russian word for Moscow in cyrillic script + const std::string moscow = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + Botan::ASN1_String str(moscow); + + Botan::DER_Encoder enc; + + str.encode_into(enc); + auto encodingResult = enc.get_contents(); + + // \x0C - ASN1 tag for 'UTF8 string' + // \x0C - 12 characters of payload + const auto moscowEncoded = + Botan::hex_decode("0C0CD09CD0BED181D0BAD0B2D0B0"); + result.test_eq("encoding result", encodingResult, moscowEncoded); + + result.test_success("No crash"); + } + catch(const std::exception &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + } class ASN1_Tests final : public Test @@ -54,6 +234,12 @@ class ASN1_Tests final : public Test std::vector<Test::Result> results; results.push_back(test_ber_stack_recursion()); + results.push_back(test_asn1_utf8_ascii_parsing()); + results.push_back(test_asn1_utf8_parsing()); + results.push_back(test_asn1_ucs2_parsing()); + results.push_back(test_asn1_ucs4_parsing()); + results.push_back(test_asn1_ascii_encoding()); + results.push_back(test_asn1_utf8_encoding()); return results; } diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp index da2d25d5e..d102a3e46 100644 --- a/src/tests/test_utils.cpp +++ b/src/tests/test_utils.cpp @@ -6,6 +6,8 @@ * Botan is released under the Simplified BSD License (see license.txt) */ +#define BOTAN_NO_DEPRECATED_WARNINGS + #include "tests.h" #include <functional> #include <ctime> @@ -421,17 +423,15 @@ class Charset_Tests final : public Text_Based_Test { converted = Botan::ucs4_to_utf8(in.data(), in.size()); } - else if(type == "UTF16-LATIN1") + else if(type == "UTF8-LATIN1") { - converted = Botan::Charset::transcode(in_str, - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UCS2_CHARSET); + converted = Botan::utf8_to_latin1(in_str); } - else if(type == "UTF8-LATIN1") + else if(type == "UTF16-LATIN1") { converted = Botan::Charset::transcode(in_str, Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::Character_Set::UCS2_CHARSET); } else if(type == "LATIN1-UTF8") { @@ -484,32 +484,25 @@ class Charset_Tests final : public Text_Based_Test result.test_throws("conversion fails for non-Latin1 characters", []() { // "abcdefÅžabcdef" - std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, - 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 - }; + const std::vector<uint8_t> input = { + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, + 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 + }; - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); result.test_throws("invalid utf-8 string", []() { // sequence truncated - std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; - - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + const std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); result.test_throws("invalid utf-8 string", []() { std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 }; - - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); return result; diff --git a/src/tests/unit_x509.cpp b/src/tests/unit_x509.cpp index b9aa1709e..d635f7fe1 100644 --- a/src/tests/unit_x509.cpp +++ b/src/tests/unit_x509.cpp @@ -358,6 +358,69 @@ Test::Result test_x509_dates() return result; } +Test::Result test_x509_utf8() + { + Test::Result result("X509 with UTF-8 encoded fields"); + + try + { + Botan::X509_Certificate utf8_cert(Test::data_file("x509test/contains_utf8string.pem")); + + // UTF-8 encoded fields of test certificate (contains cyrillic letters) + const std::string organization = + "\xD0\x9C\xD0\xBE\xD1\x8F\x20\xD0\xBA\xD0\xBE\xD0" + "\xBC\xD0\xBF\xD0\xB0\xD0\xBD\xD0\xB8\xD1\x8F"; + const std::string organization_unit = + "\xD0\x9C\xD0\xBE\xD1\x91\x20\xD0\xBF\xD0\xBE\xD0\xB4\xD1\x80\xD0\xB0" + "\xD0\xB7\xD0\xB4\xD0\xB5\xD0\xBB\xD0\xB5\xD0\xBD\xD0\xB8\xD0\xB5"; + const std::string common_name = + "\xD0\x9E\xD0\xBF\xD0\xB8\xD1\x81\xD0\xB0\xD0\xBD\xD0\xB8" + "\xD0\xB5\x20\xD1\x81\xD0\xB0\xD0\xB9\xD1\x82\xD0\xB0"; + const std::string location = + "\xD0\x9C\xD0\xBE\xD1\x81\xD0\xBA\xD0\xB2\xD0\xB0"; + + result.test_eq("O", utf8_cert.issuer_info("O").at(0), organization); + result.test_eq("OU", utf8_cert.issuer_info("OU").at(0), organization_unit); + result.test_eq("CN", utf8_cert.issuer_info("CN").at(0), common_name); + result.test_eq("L", utf8_cert.issuer_info("L").at(0), location); + } + catch (const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + +Test::Result test_x509_bmpstring() + { + Test::Result result("X509 with UCS-2 (BMPString) encoded fields"); + + try + { + Botan::X509_Certificate ucs2_cert(Test::data_file("x509test/contains_bmpstring.pem")); + + // UTF-8 encoded fields of test certificate (contains cyrillic and greek letters) + const std::string organization = + "\x6E\x65\xCF\x87\xCF\xB5\x6E\x69\xCF\x89"; + const std::string common_name = + "\xC3\xA8\x6E\xC7\x9D\xD0\xAF\x20\xD0\x9C\xC7\x9D\xD0\xB9\xD0\xB7\xD1\x8D\xD0\xBB"; + + // UTF-8 encoded fields of test certificate (contains only ASCII characters) + const std::string location = "Berlin"; + + result.test_eq("O", ucs2_cert.issuer_info("O").at(0), organization); + result.test_eq("CN", ucs2_cert.issuer_info("CN").at(0), common_name); + result.test_eq("L", ucs2_cert.issuer_info("L").at(0), location); + } + catch (const Botan::Decoding_Error &ex) + { + result.test_failure(ex.what()); + } + + return result; + } + Test::Result test_x509_cert(const std::string& sig_algo, const std::string& hash_fn = "SHA-256") { Test::Result result("X509 Unit"); @@ -1135,6 +1198,8 @@ class X509_Cert_Unit_Tests final : public Test results.push_back(test_x509_dates()); results.push_back(test_cert_status_strings()); results.push_back(test_hashes("ECDSA")); + results.push_back(test_x509_utf8()); + results.push_back(test_x509_bmpstring()); return results; } |