diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cli/asn1.cpp | 15 | ||||
-rw-r--r-- | src/lib/asn1/asn1_alt_name.cpp | 13 | ||||
-rw-r--r-- | src/lib/asn1/asn1_str.cpp | 132 | ||||
-rw-r--r-- | src/lib/asn1/asn1_str.h | 16 | ||||
-rw-r--r-- | src/lib/asn1/asn1_time.cpp | 11 | ||||
-rw-r--r-- | src/lib/asn1/info.txt | 2 | ||||
-rw-r--r-- | src/lib/utils/charset.cpp | 58 | ||||
-rw-r--r-- | src/lib/utils/charset.h | 44 | ||||
-rw-r--r-- | src/lib/x509/name_constraint.cpp | 3 | ||||
-rw-r--r-- | src/lib/x509/x509_ext.cpp | 7 | ||||
-rw-r--r-- | src/tests/test_utils.cpp | 35 |
11 files changed, 140 insertions, 196 deletions
diff --git a/src/cli/asn1.cpp b/src/cli/asn1.cpp index 5c90a3c5b..234cbd6e6 100644 --- a/src/cli/asn1.cpp +++ b/src/cli/asn1.cpp @@ -22,9 +22,6 @@ #include <sstream> #include <ctype.h> -// Set this if your terminal understands UTF-8; otherwise output is in Latin-1 -#define UTF8_TERMINAL 1 - namespace Botan_CLI { namespace { @@ -349,17 +346,7 @@ void decode(std::ostream& output, { Botan::ASN1_String str; data.decode(str); - if(UTF8_TERMINAL) - { - emit(output, type_name(type_tag), level, length, - Botan::Charset::transcode(str.iso_8859(), - Botan::UTF8_CHARSET, - Botan::LATIN1_CHARSET)); - } - else - { - emit(output, type_name(type_tag), level, length, str.iso_8859()); - } + emit(output, type_name(type_tag), level, length, str.value()); } else if(type_tag == Botan::UTC_TIME || type_tag == Botan::GENERALIZED_TIME) { diff --git a/src/lib/asn1/asn1_alt_name.cpp b/src/lib/asn1/asn1_alt_name.cpp index 7bd4cd494..940312886 100644 --- a/src/lib/asn1/asn1_alt_name.cpp +++ b/src/lib/asn1/asn1_alt_name.cpp @@ -11,7 +11,6 @@ #include <botan/ber_dec.h> #include <botan/oids.h> #include <botan/internal/stl_util.h> -#include <botan/charset.h> #include <botan/parsing.h> #include <botan/loadstor.h> @@ -133,7 +132,7 @@ void encode_entries(DER_Encoder& encoder, if(type == "RFC822" || type == "DNS" || type == "URI") { ASN1_String asn1_string(i->second, IA5_STRING); - encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.iso_8859()); + encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.value()); } else if(type == "IP") { @@ -218,13 +217,9 @@ void AlternativeName::decode_from(BER_Decoder& source) } else if(tag == 1 || tag == 2 || tag == 6) { - const std::string value = Charset::transcode(ASN1::to_string(obj), - LATIN1_CHARSET, - LOCAL_CHARSET); - - if(tag == 1) add_attribute("RFC822", value); - if(tag == 2) add_attribute("DNS", value); - if(tag == 6) add_attribute("URI", value); + if(tag == 1) add_attribute("RFC822", ASN1::to_string(obj)); + if(tag == 2) add_attribute("DNS", ASN1::to_string(obj)); + if(tag == 6) add_attribute("URI", ASN1::to_string(obj)); } else if(tag == 7) { diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp index 070acbebd..d90aa215b 100644 --- a/src/lib/asn1/asn1_str.cpp +++ b/src/lib/asn1/asn1_str.cpp @@ -10,9 +10,6 @@ #include <botan/ber_dec.h> #include <botan/charset.h> -#include <codecvt> -#include <locale> - namespace Botan { namespace { @@ -20,8 +17,7 @@ namespace { /* * Choose an encoding for the string */ -ASN1_Tag choose_encoding(const std::string& str, - const std::string& type) +ASN1_Tag choose_encoding(const std::string& str) { static const uint8_t IS_PRINTABLE[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -51,63 +47,49 @@ ASN1_Tag choose_encoding(const std::string& str, { if(!IS_PRINTABLE[static_cast<uint8_t>(str[i])]) { - if(type == "utf8") return UTF8_STRING; - if(type == "latin1") return T61_STRING; - throw Invalid_Argument("choose_encoding: Bad string type " + type); + return UTF8_STRING; } } return PRINTABLE_STRING; } -} - -template <typename CharT, class AllocT> -static std::string ucsX_to_utf8(const std::vector<byte, AllocT> &ucsX) +void assert_is_string_type(ASN1_Tag tag) { - if (ucsX.size() % sizeof(CharT) != 0) + if(tag != NUMERIC_STRING && + tag != PRINTABLE_STRING && + tag != VISIBLE_STRING && + tag != T61_STRING && + tag != IA5_STRING && + tag != UTF8_STRING && + tag != BMP_STRING && + tag != UNIVERSAL_STRING) { - throw Invalid_Argument("cannot decode UCS string (wrong byte count)"); + throw Invalid_Argument("ASN1_String: Unknown string type " + + std::to_string(tag)); } - - union - { - const byte *as_char; - const CharT *as_wide_char; - }; - - as_char = ucsX.data(); - const size_t wide_char_count = ucsX.size() / sizeof(CharT); - - using converter_t = std::codecvt_utf8<CharT, 0x10ffff, std::consume_header>; - std::wstring_convert<converter_t, CharT> convert; - return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count); } +} + /* * Create an ASN1_String */ ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_utf8_str(str), m_tag(t) { - if(m_tag == DIRECTORY_STRING) - m_tag = choose_encoding(m_utf8_str, "utf8"); - - if(m_tag != NUMERIC_STRING && - m_tag != PRINTABLE_STRING && - m_tag != VISIBLE_STRING && - m_tag != T61_STRING && - m_tag != IA5_STRING && - m_tag != UTF8_STRING && - m_tag != BMP_STRING && - m_tag != UNIVERSAL_STRING) - throw Invalid_Argument("ASN1_String: Unknown string type " + - std::to_string(m_tag)); + { + m_tag = choose_encoding(m_utf8_str); + } + + assert_is_string_type(m_tag); } /* * Create an ASN1_String */ -ASN1_String::ASN1_String(const std::string& str) : m_utf8_str(str), m_tag(choose_encoding(m_utf8_str, "utf8")) +ASN1_String::ASN1_String(const std::string& str) : + m_utf8_str(str), + m_tag(choose_encoding(m_utf8_str)) {} /* @@ -115,23 +97,7 @@ ASN1_String::ASN1_String(const std::string& str) : m_utf8_str(str), m_tag(choose */ std::string ASN1_String::iso_8859() const { - return Charset::transcode(m_utf8_str, LATIN1_CHARSET, UTF8_CHARSET); - } - -/* -* Return this string in UTF-8 encoding -*/ -std::string ASN1_String::value() const - { - return m_utf8_str; - } - -/* -* Return the type of this string object -*/ -ASN1_Tag ASN1_String::tagging() const - { - return m_tag; + return utf8_to_latin1(m_utf8_str); } /* @@ -139,7 +105,15 @@ ASN1_Tag ASN1_String::tagging() const */ void ASN1_String::encode_into(DER_Encoder& encoder) const { - encoder.add_object(tagging(), UNIVERSAL, m_utf8_str); + if(m_data.empty()) + { + encoder.add_object(tagging(), UNIVERSAL, m_utf8_str); + } + else + { + // If this string was decoded, reserialize using original encoding + encoder.add_object(tagging(), UNIVERSAL, m_data.data(), m_data.size()); + } } /* @@ -149,39 +123,23 @@ void ASN1_String::decode_from(BER_Decoder& source) { BER_Object obj = source.get_next_object(); -#if defined(BOTAN_TARGET_OS_TYPE_IS_WINDOWS) - // using char32_t and char16_t (as suggested by the standard) leads to linker - // errors on MSVC 2015 and 2017. This workaround was suggested here: - // https://social.msdn.microsoft.com/Forums/vstudio/en-US/ - // 8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error - using utf32_type = int32_t; - using utf16_type = wchar_t; -#else - using utf32_type = char32_t; - using utf16_type = char16_t; -#endif - - if(obj.type_tag == UTF8_STRING) - { - *this = ASN1_String(ASN1::to_string(obj), obj.type_tag); - } - else if(obj.type_tag == BMP_STRING) + assert_is_string_type(obj.type_tag); + + m_tag = obj.type_tag; + m_data.assign(obj.value.begin(), obj.value.end()); + + if(m_tag == BMP_STRING) { - *this = ASN1_String(ucsX_to_utf8<utf16_type>(obj.value), obj.type_tag); + m_utf8_str = ucs2_to_utf8(m_data.data(), m_data.size()); } - else if(obj.type_tag == UNIVERSAL_STRING) + else if(m_tag == UNIVERSAL_STRING) { - *this = ASN1_String(ucsX_to_utf8<utf32_type>(obj.value), obj.type_tag); + m_utf8_str = ucs4_to_utf8(m_data.data(), m_data.size()); } - else // IA5_STRING - international ASCII characters - // T61_STRING - pretty much ASCII - // PRINTABLE_STRING - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE) - // VISIBLE_STRING - visible ASCII subset - // NUMERIC_STRING - ASCII subset (0-9 and SPACE) + else { - *this = ASN1_String( - Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET), - obj.type_tag); + // All other supported string types are UTF-8 or some subset thereof + m_utf8_str = ASN1::to_string(obj); } } diff --git a/src/lib/asn1/asn1_str.h b/src/lib/asn1/asn1_str.h index 3ad82582e..f19265494 100644 --- a/src/lib/asn1/asn1_str.h +++ b/src/lib/asn1/asn1_str.h @@ -13,7 +13,8 @@ namespace Botan { /** -* Simple String +* ASN.1 string type +* This class normalizes all inputs to a UTF-8 std::string */ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object { @@ -21,14 +22,17 @@ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object void encode_into(class DER_Encoder&) const override; void decode_from(class BER_Decoder&) override; - std::string value() const; - std::string iso_8859() const; + ASN1_Tag tagging() const { return m_tag; } + + const std::string& value() const { return m_utf8_str; } - ASN1_Tag tagging() const; + std::string BOTAN_DEPRECATED("Use value() to get UTF-8 string instead") + iso_8859() const; - explicit ASN1_String(const std::string& = ""); - ASN1_String(const std::string&, ASN1_Tag); + explicit ASN1_String(const std::string& utf8 = ""); + ASN1_String(const std::string& utf8, ASN1_Tag tag); private: + std::vector<uint8_t> m_data; std::string m_utf8_str; ASN1_Tag m_tag; }; diff --git a/src/lib/asn1/asn1_time.cpp b/src/lib/asn1/asn1_time.cpp index 2cd225915..f6a0c414e 100644 --- a/src/lib/asn1/asn1_time.cpp +++ b/src/lib/asn1/asn1_time.cpp @@ -8,7 +8,6 @@ #include <botan/asn1_time.h> #include <botan/der_enc.h> #include <botan/ber_dec.h> -#include <botan/charset.h> #include <botan/exceptn.h> #include <botan/parsing.h> #include <botan/calendar.h> @@ -41,20 +40,14 @@ void X509_Time::encode_into(DER_Encoder& der) const if(m_tag != GENERALIZED_TIME && m_tag != UTC_TIME) throw Invalid_Argument("X509_Time: Bad encoding tag"); - der.add_object(m_tag, UNIVERSAL, - Charset::transcode(to_string(), - LOCAL_CHARSET, - LATIN1_CHARSET)); + der.add_object(m_tag, UNIVERSAL, to_string()); } void X509_Time::decode_from(BER_Decoder& source) { BER_Object ber_time = source.get_next_object(); - set_to(Charset::transcode(ASN1::to_string(ber_time), - LATIN1_CHARSET, - LOCAL_CHARSET), - ber_time.type_tag); + set_to(ASN1::to_string(ber_time), ber_time.type_tag); } std::string X509_Time::to_string() const diff --git a/src/lib/asn1/info.txt b/src/lib/asn1/info.txt index 4b3689f45..4772e1ca7 100644 --- a/src/lib/asn1/info.txt +++ b/src/lib/asn1/info.txt @@ -1,5 +1,5 @@ <defines> -ASN1 -> 20161102 +ASN1 -> 20171109 </defines> <requires> diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp index dadee8f78..ca32c652d 100644 --- a/src/lib/utils/charset.cpp +++ b/src/lib/utils/charset.cpp @@ -92,34 +92,6 @@ std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) return s; } -namespace Charset { - -namespace { - -/* -* Convert from UCS-2 to ISO 8859-1 -*/ -std::string ucs2_to_latin1(const std::string& ucs2) - { - if(ucs2.size() % 2 == 1) - throw Decoding_Error("UCS-2 string has an odd number of bytes"); - - std::string latin1; - - for(size_t i = 0; i != ucs2.size(); i += 2) - { - const uint8_t c1 = ucs2[i]; - const uint8_t c2 = ucs2[i+1]; - - if(c1 != 0) - throw Decoding_Error("UCS-2 has non-Latin1 characters"); - - latin1 += static_cast<char>(c2); - } - - return latin1; - } - /* * Convert from UTF-8 to ISO 8859-1 */ @@ -133,7 +105,9 @@ std::string utf8_to_latin1(const std::string& utf8) const uint8_t c1 = static_cast<uint8_t>(utf8[position++]); if(c1 <= 0x7F) + { iso8859 += static_cast<char>(c1); + } else if(c1 >= 0xC0 && c1 <= 0xC7) { if(position == utf8.size()) @@ -154,6 +128,34 @@ std::string utf8_to_latin1(const std::string& utf8) return iso8859; } +namespace Charset { + +namespace { + +/* +* Convert from UCS-2 to ISO 8859-1 +*/ +std::string ucs2_to_latin1(const std::string& ucs2) + { + if(ucs2.size() % 2 == 1) + throw Decoding_Error("UCS-2 string has an odd number of bytes"); + + std::string latin1; + + for(size_t i = 0; i != ucs2.size(); i += 2) + { + const uint8_t c1 = ucs2[i]; + const uint8_t c2 = ucs2[i+1]; + + if(c1 != 0) + throw Decoding_Error("UCS-2 has non-Latin1 characters"); + + latin1 += static_cast<char>(c2); + } + + return latin1; + } + /* * Convert from ISO 8859-1 to UTF-8 */ diff --git a/src/lib/utils/charset.h b/src/lib/utils/charset.h index 3f2ff9912..4913f0a5a 100644 --- a/src/lib/utils/charset.h +++ b/src/lib/utils/charset.h @@ -14,16 +14,6 @@ namespace Botan { /** -* The different charsets (nominally) supported by Botan. -*/ -enum Character_Set { - LOCAL_CHARSET, - UCS2_CHARSET, - UTF8_CHARSET, - LATIN1_CHARSET -}; - -/** * Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string * This is used for ASN.1 BMPString type * @param ucs2 the sequence of UCS-2 characters @@ -39,15 +29,41 @@ std::string BOTAN_UNSTABLE_API ucs2_to_utf8(const uint8_t ucs2[], size_t len); */ std::string BOTAN_UNSTABLE_API ucs4_to_utf8(const uint8_t ucs4[], size_t len); +/** +* Convert a UTF-8 string to Latin-1 +* If a character outside the Latin-1 range is encountered, an exception is thrown. +*/ +std::string BOTAN_UNSTABLE_API utf8_to_latin1(const std::string& utf8); + +/** +* The different charsets (nominally) supported by Botan. +*/ +enum Character_Set { + LOCAL_CHARSET, + UCS2_CHARSET, + UTF8_CHARSET, + LATIN1_CHARSET +}; + namespace Charset { /* -* Character Set Handling +* Character set conversion - avoid this. +* For specific conversions, use the functions above like +* ucs2_to_utf8 and utf8_to_latin1 +* +* If you need something more complex than that, use a real library +* such as iconv, Boost.Locale, or ICU */ -std::string BOTAN_PUBLIC_API(2,0) transcode(const std::string& str, - Character_Set to, - Character_Set from); +std::string BOTAN_PUBLIC_API(2,0) + BOTAN_DEPRECATED("Avoid. See comment in header.") + transcode(const std::string& str, + Character_Set to, + Character_Set from); +/* +* Simple character classifier functions +*/ bool BOTAN_PUBLIC_API(2,0) is_digit(char c); bool BOTAN_PUBLIC_API(2,0) is_space(char c); bool BOTAN_PUBLIC_API(2,0) caseless_cmp(char x, char y); diff --git a/src/lib/x509/name_constraint.cpp b/src/lib/x509/name_constraint.cpp index e27dca9ec..e098bcd8d 100644 --- a/src/lib/x509/name_constraint.cpp +++ b/src/lib/x509/name_constraint.cpp @@ -7,7 +7,6 @@ #include <botan/name_constraint.h> #include <botan/ber_dec.h> -#include <botan/charset.h> #include <botan/loadstor.h> #include <botan/x509_dn.h> #include <botan/x509cert.h> @@ -49,7 +48,7 @@ void GeneralName::decode_from(class BER_Decoder& ber) if(tag == 1 || tag == 2 || tag == 6) { - m_name = Charset::transcode(ASN1::to_string(obj), LATIN1_CHARSET, LOCAL_CHARSET); + m_name = ASN1::to_string(obj); if(tag == 1) { diff --git a/src/lib/x509/x509_ext.cpp b/src/lib/x509/x509_ext.cpp index 3141d3c44..6e4c29d42 100644 --- a/src/lib/x509/x509_ext.cpp +++ b/src/lib/x509/x509_ext.cpp @@ -12,7 +12,6 @@ #include <botan/der_enc.h> #include <botan/ber_dec.h> #include <botan/oids.h> -#include <botan/charset.h> #include <botan/internal/bit_ops.h> #include <algorithm> #include <sstream> @@ -735,7 +734,7 @@ std::vector<uint8_t> Authority_Information_Access::encode_inner() const .start_cons(SEQUENCE) .start_cons(SEQUENCE) .encode(OIDS::lookup("PKIX.OCSP")) - .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.iso_8859()) + .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.value()) .end_cons() .end_cons().get_contents_unlocked(); } @@ -758,9 +757,7 @@ void Authority_Information_Access::decode_inner(const std::vector<uint8_t>& in) if(name.type_tag == 6 && name.class_tag == CONTEXT_SPECIFIC) { - m_ocsp_responder = Charset::transcode(ASN1::to_string(name), - LATIN1_CHARSET, - LOCAL_CHARSET); + m_ocsp_responder = ASN1::to_string(name); } } diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp index da2d25d5e..d102a3e46 100644 --- a/src/tests/test_utils.cpp +++ b/src/tests/test_utils.cpp @@ -6,6 +6,8 @@ * Botan is released under the Simplified BSD License (see license.txt) */ +#define BOTAN_NO_DEPRECATED_WARNINGS + #include "tests.h" #include <functional> #include <ctime> @@ -421,17 +423,15 @@ class Charset_Tests final : public Text_Based_Test { converted = Botan::ucs4_to_utf8(in.data(), in.size()); } - else if(type == "UTF16-LATIN1") + else if(type == "UTF8-LATIN1") { - converted = Botan::Charset::transcode(in_str, - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UCS2_CHARSET); + converted = Botan::utf8_to_latin1(in_str); } - else if(type == "UTF8-LATIN1") + else if(type == "UTF16-LATIN1") { converted = Botan::Charset::transcode(in_str, Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::Character_Set::UCS2_CHARSET); } else if(type == "LATIN1-UTF8") { @@ -484,32 +484,25 @@ class Charset_Tests final : public Text_Based_Test result.test_throws("conversion fails for non-Latin1 characters", []() { // "abcdefÅžabcdef" - std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, - 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 - }; + const std::vector<uint8_t> input = { + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5, + 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 + }; - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); result.test_throws("invalid utf-8 string", []() { // sequence truncated - std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; - - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + const std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 }; + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); result.test_throws("invalid utf-8 string", []() { std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 }; - - Botan::Charset::transcode(std::string(input.begin(), input.end()), - Botan::Character_Set::LATIN1_CHARSET, - Botan::Character_Set::UTF8_CHARSET); + Botan::utf8_to_latin1(std::string(input.begin(), input.end())); }); return result; |