aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cli/asn1.cpp15
-rw-r--r--src/lib/asn1/asn1_alt_name.cpp13
-rw-r--r--src/lib/asn1/asn1_str.cpp132
-rw-r--r--src/lib/asn1/asn1_str.h16
-rw-r--r--src/lib/asn1/asn1_time.cpp11
-rw-r--r--src/lib/asn1/info.txt2
-rw-r--r--src/lib/utils/charset.cpp58
-rw-r--r--src/lib/utils/charset.h44
-rw-r--r--src/lib/x509/name_constraint.cpp3
-rw-r--r--src/lib/x509/x509_ext.cpp7
-rw-r--r--src/tests/test_utils.cpp35
11 files changed, 140 insertions, 196 deletions
diff --git a/src/cli/asn1.cpp b/src/cli/asn1.cpp
index 5c90a3c5b..234cbd6e6 100644
--- a/src/cli/asn1.cpp
+++ b/src/cli/asn1.cpp
@@ -22,9 +22,6 @@
#include <sstream>
#include <ctype.h>
-// Set this if your terminal understands UTF-8; otherwise output is in Latin-1
-#define UTF8_TERMINAL 1
-
namespace Botan_CLI {
namespace {
@@ -349,17 +346,7 @@ void decode(std::ostream& output,
{
Botan::ASN1_String str;
data.decode(str);
- if(UTF8_TERMINAL)
- {
- emit(output, type_name(type_tag), level, length,
- Botan::Charset::transcode(str.iso_8859(),
- Botan::UTF8_CHARSET,
- Botan::LATIN1_CHARSET));
- }
- else
- {
- emit(output, type_name(type_tag), level, length, str.iso_8859());
- }
+ emit(output, type_name(type_tag), level, length, str.value());
}
else if(type_tag == Botan::UTC_TIME || type_tag == Botan::GENERALIZED_TIME)
{
diff --git a/src/lib/asn1/asn1_alt_name.cpp b/src/lib/asn1/asn1_alt_name.cpp
index 7bd4cd494..940312886 100644
--- a/src/lib/asn1/asn1_alt_name.cpp
+++ b/src/lib/asn1/asn1_alt_name.cpp
@@ -11,7 +11,6 @@
#include <botan/ber_dec.h>
#include <botan/oids.h>
#include <botan/internal/stl_util.h>
-#include <botan/charset.h>
#include <botan/parsing.h>
#include <botan/loadstor.h>
@@ -133,7 +132,7 @@ void encode_entries(DER_Encoder& encoder,
if(type == "RFC822" || type == "DNS" || type == "URI")
{
ASN1_String asn1_string(i->second, IA5_STRING);
- encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.iso_8859());
+ encoder.add_object(tagging, CONTEXT_SPECIFIC, asn1_string.value());
}
else if(type == "IP")
{
@@ -218,13 +217,9 @@ void AlternativeName::decode_from(BER_Decoder& source)
}
else if(tag == 1 || tag == 2 || tag == 6)
{
- const std::string value = Charset::transcode(ASN1::to_string(obj),
- LATIN1_CHARSET,
- LOCAL_CHARSET);
-
- if(tag == 1) add_attribute("RFC822", value);
- if(tag == 2) add_attribute("DNS", value);
- if(tag == 6) add_attribute("URI", value);
+ if(tag == 1) add_attribute("RFC822", ASN1::to_string(obj));
+ if(tag == 2) add_attribute("DNS", ASN1::to_string(obj));
+ if(tag == 6) add_attribute("URI", ASN1::to_string(obj));
}
else if(tag == 7)
{
diff --git a/src/lib/asn1/asn1_str.cpp b/src/lib/asn1/asn1_str.cpp
index 070acbebd..d90aa215b 100644
--- a/src/lib/asn1/asn1_str.cpp
+++ b/src/lib/asn1/asn1_str.cpp
@@ -10,9 +10,6 @@
#include <botan/ber_dec.h>
#include <botan/charset.h>
-#include <codecvt>
-#include <locale>
-
namespace Botan {
namespace {
@@ -20,8 +17,7 @@ namespace {
/*
* Choose an encoding for the string
*/
-ASN1_Tag choose_encoding(const std::string& str,
- const std::string& type)
+ASN1_Tag choose_encoding(const std::string& str)
{
static const uint8_t IS_PRINTABLE[256] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -51,63 +47,49 @@ ASN1_Tag choose_encoding(const std::string& str,
{
if(!IS_PRINTABLE[static_cast<uint8_t>(str[i])])
{
- if(type == "utf8") return UTF8_STRING;
- if(type == "latin1") return T61_STRING;
- throw Invalid_Argument("choose_encoding: Bad string type " + type);
+ return UTF8_STRING;
}
}
return PRINTABLE_STRING;
}
-}
-
-template <typename CharT, class AllocT>
-static std::string ucsX_to_utf8(const std::vector<byte, AllocT> &ucsX)
+void assert_is_string_type(ASN1_Tag tag)
{
- if (ucsX.size() % sizeof(CharT) != 0)
+ if(tag != NUMERIC_STRING &&
+ tag != PRINTABLE_STRING &&
+ tag != VISIBLE_STRING &&
+ tag != T61_STRING &&
+ tag != IA5_STRING &&
+ tag != UTF8_STRING &&
+ tag != BMP_STRING &&
+ tag != UNIVERSAL_STRING)
{
- throw Invalid_Argument("cannot decode UCS string (wrong byte count)");
+ throw Invalid_Argument("ASN1_String: Unknown string type " +
+ std::to_string(tag));
}
-
- union
- {
- const byte *as_char;
- const CharT *as_wide_char;
- };
-
- as_char = ucsX.data();
- const size_t wide_char_count = ucsX.size() / sizeof(CharT);
-
- using converter_t = std::codecvt_utf8<CharT, 0x10ffff, std::consume_header>;
- std::wstring_convert<converter_t, CharT> convert;
- return convert.to_bytes(as_wide_char, as_wide_char + wide_char_count);
}
+}
+
/*
* Create an ASN1_String
*/
ASN1_String::ASN1_String(const std::string& str, ASN1_Tag t) : m_utf8_str(str), m_tag(t)
{
-
if(m_tag == DIRECTORY_STRING)
- m_tag = choose_encoding(m_utf8_str, "utf8");
-
- if(m_tag != NUMERIC_STRING &&
- m_tag != PRINTABLE_STRING &&
- m_tag != VISIBLE_STRING &&
- m_tag != T61_STRING &&
- m_tag != IA5_STRING &&
- m_tag != UTF8_STRING &&
- m_tag != BMP_STRING &&
- m_tag != UNIVERSAL_STRING)
- throw Invalid_Argument("ASN1_String: Unknown string type " +
- std::to_string(m_tag));
+ {
+ m_tag = choose_encoding(m_utf8_str);
+ }
+
+ assert_is_string_type(m_tag);
}
/*
* Create an ASN1_String
*/
-ASN1_String::ASN1_String(const std::string& str) : m_utf8_str(str), m_tag(choose_encoding(m_utf8_str, "utf8"))
+ASN1_String::ASN1_String(const std::string& str) :
+ m_utf8_str(str),
+ m_tag(choose_encoding(m_utf8_str))
{}
/*
@@ -115,23 +97,7 @@ ASN1_String::ASN1_String(const std::string& str) : m_utf8_str(str), m_tag(choose
*/
std::string ASN1_String::iso_8859() const
{
- return Charset::transcode(m_utf8_str, LATIN1_CHARSET, UTF8_CHARSET);
- }
-
-/*
-* Return this string in UTF-8 encoding
-*/
-std::string ASN1_String::value() const
- {
- return m_utf8_str;
- }
-
-/*
-* Return the type of this string object
-*/
-ASN1_Tag ASN1_String::tagging() const
- {
- return m_tag;
+ return utf8_to_latin1(m_utf8_str);
}
/*
@@ -139,7 +105,15 @@ ASN1_Tag ASN1_String::tagging() const
*/
void ASN1_String::encode_into(DER_Encoder& encoder) const
{
- encoder.add_object(tagging(), UNIVERSAL, m_utf8_str);
+ if(m_data.empty())
+ {
+ encoder.add_object(tagging(), UNIVERSAL, m_utf8_str);
+ }
+ else
+ {
+ // If this string was decoded, reserialize using original encoding
+ encoder.add_object(tagging(), UNIVERSAL, m_data.data(), m_data.size());
+ }
}
/*
@@ -149,39 +123,23 @@ void ASN1_String::decode_from(BER_Decoder& source)
{
BER_Object obj = source.get_next_object();
-#if defined(BOTAN_TARGET_OS_TYPE_IS_WINDOWS)
- // using char32_t and char16_t (as suggested by the standard) leads to linker
- // errors on MSVC 2015 and 2017. This workaround was suggested here:
- // https://social.msdn.microsoft.com/Forums/vstudio/en-US/
- // 8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error
- using utf32_type = int32_t;
- using utf16_type = wchar_t;
-#else
- using utf32_type = char32_t;
- using utf16_type = char16_t;
-#endif
-
- if(obj.type_tag == UTF8_STRING)
- {
- *this = ASN1_String(ASN1::to_string(obj), obj.type_tag);
- }
- else if(obj.type_tag == BMP_STRING)
+ assert_is_string_type(obj.type_tag);
+
+ m_tag = obj.type_tag;
+ m_data.assign(obj.value.begin(), obj.value.end());
+
+ if(m_tag == BMP_STRING)
{
- *this = ASN1_String(ucsX_to_utf8<utf16_type>(obj.value), obj.type_tag);
+ m_utf8_str = ucs2_to_utf8(m_data.data(), m_data.size());
}
- else if(obj.type_tag == UNIVERSAL_STRING)
+ else if(m_tag == UNIVERSAL_STRING)
{
- *this = ASN1_String(ucsX_to_utf8<utf32_type>(obj.value), obj.type_tag);
+ m_utf8_str = ucs4_to_utf8(m_data.data(), m_data.size());
}
- else // IA5_STRING - international ASCII characters
- // T61_STRING - pretty much ASCII
- // PRINTABLE_STRING - ASCII subset (a-z, A-Z, ' () +,-.?:/= and SPACE)
- // VISIBLE_STRING - visible ASCII subset
- // NUMERIC_STRING - ASCII subset (0-9 and SPACE)
+ else
{
- *this = ASN1_String(
- Charset::transcode(ASN1::to_string(obj), UTF8_CHARSET, LATIN1_CHARSET),
- obj.type_tag);
+ // All other supported string types are UTF-8 or some subset thereof
+ m_utf8_str = ASN1::to_string(obj);
}
}
diff --git a/src/lib/asn1/asn1_str.h b/src/lib/asn1/asn1_str.h
index 3ad82582e..f19265494 100644
--- a/src/lib/asn1/asn1_str.h
+++ b/src/lib/asn1/asn1_str.h
@@ -13,7 +13,8 @@
namespace Botan {
/**
-* Simple String
+* ASN.1 string type
+* This class normalizes all inputs to a UTF-8 std::string
*/
class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object
{
@@ -21,14 +22,17 @@ class BOTAN_PUBLIC_API(2,0) ASN1_String final : public ASN1_Object
void encode_into(class DER_Encoder&) const override;
void decode_from(class BER_Decoder&) override;
- std::string value() const;
- std::string iso_8859() const;
+ ASN1_Tag tagging() const { return m_tag; }
+
+ const std::string& value() const { return m_utf8_str; }
- ASN1_Tag tagging() const;
+ std::string BOTAN_DEPRECATED("Use value() to get UTF-8 string instead")
+ iso_8859() const;
- explicit ASN1_String(const std::string& = "");
- ASN1_String(const std::string&, ASN1_Tag);
+ explicit ASN1_String(const std::string& utf8 = "");
+ ASN1_String(const std::string& utf8, ASN1_Tag tag);
private:
+ std::vector<uint8_t> m_data;
std::string m_utf8_str;
ASN1_Tag m_tag;
};
diff --git a/src/lib/asn1/asn1_time.cpp b/src/lib/asn1/asn1_time.cpp
index 2cd225915..f6a0c414e 100644
--- a/src/lib/asn1/asn1_time.cpp
+++ b/src/lib/asn1/asn1_time.cpp
@@ -8,7 +8,6 @@
#include <botan/asn1_time.h>
#include <botan/der_enc.h>
#include <botan/ber_dec.h>
-#include <botan/charset.h>
#include <botan/exceptn.h>
#include <botan/parsing.h>
#include <botan/calendar.h>
@@ -41,20 +40,14 @@ void X509_Time::encode_into(DER_Encoder& der) const
if(m_tag != GENERALIZED_TIME && m_tag != UTC_TIME)
throw Invalid_Argument("X509_Time: Bad encoding tag");
- der.add_object(m_tag, UNIVERSAL,
- Charset::transcode(to_string(),
- LOCAL_CHARSET,
- LATIN1_CHARSET));
+ der.add_object(m_tag, UNIVERSAL, to_string());
}
void X509_Time::decode_from(BER_Decoder& source)
{
BER_Object ber_time = source.get_next_object();
- set_to(Charset::transcode(ASN1::to_string(ber_time),
- LATIN1_CHARSET,
- LOCAL_CHARSET),
- ber_time.type_tag);
+ set_to(ASN1::to_string(ber_time), ber_time.type_tag);
}
std::string X509_Time::to_string() const
diff --git a/src/lib/asn1/info.txt b/src/lib/asn1/info.txt
index 4b3689f45..4772e1ca7 100644
--- a/src/lib/asn1/info.txt
+++ b/src/lib/asn1/info.txt
@@ -1,5 +1,5 @@
<defines>
-ASN1 -> 20161102
+ASN1 -> 20171109
</defines>
<requires>
diff --git a/src/lib/utils/charset.cpp b/src/lib/utils/charset.cpp
index dadee8f78..ca32c652d 100644
--- a/src/lib/utils/charset.cpp
+++ b/src/lib/utils/charset.cpp
@@ -92,34 +92,6 @@ std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
return s;
}
-namespace Charset {
-
-namespace {
-
-/*
-* Convert from UCS-2 to ISO 8859-1
-*/
-std::string ucs2_to_latin1(const std::string& ucs2)
- {
- if(ucs2.size() % 2 == 1)
- throw Decoding_Error("UCS-2 string has an odd number of bytes");
-
- std::string latin1;
-
- for(size_t i = 0; i != ucs2.size(); i += 2)
- {
- const uint8_t c1 = ucs2[i];
- const uint8_t c2 = ucs2[i+1];
-
- if(c1 != 0)
- throw Decoding_Error("UCS-2 has non-Latin1 characters");
-
- latin1 += static_cast<char>(c2);
- }
-
- return latin1;
- }
-
/*
* Convert from UTF-8 to ISO 8859-1
*/
@@ -133,7 +105,9 @@ std::string utf8_to_latin1(const std::string& utf8)
const uint8_t c1 = static_cast<uint8_t>(utf8[position++]);
if(c1 <= 0x7F)
+ {
iso8859 += static_cast<char>(c1);
+ }
else if(c1 >= 0xC0 && c1 <= 0xC7)
{
if(position == utf8.size())
@@ -154,6 +128,34 @@ std::string utf8_to_latin1(const std::string& utf8)
return iso8859;
}
+namespace Charset {
+
+namespace {
+
+/*
+* Convert from UCS-2 to ISO 8859-1
+*/
+std::string ucs2_to_latin1(const std::string& ucs2)
+ {
+ if(ucs2.size() % 2 == 1)
+ throw Decoding_Error("UCS-2 string has an odd number of bytes");
+
+ std::string latin1;
+
+ for(size_t i = 0; i != ucs2.size(); i += 2)
+ {
+ const uint8_t c1 = ucs2[i];
+ const uint8_t c2 = ucs2[i+1];
+
+ if(c1 != 0)
+ throw Decoding_Error("UCS-2 has non-Latin1 characters");
+
+ latin1 += static_cast<char>(c2);
+ }
+
+ return latin1;
+ }
+
/*
* Convert from ISO 8859-1 to UTF-8
*/
diff --git a/src/lib/utils/charset.h b/src/lib/utils/charset.h
index 3f2ff9912..4913f0a5a 100644
--- a/src/lib/utils/charset.h
+++ b/src/lib/utils/charset.h
@@ -14,16 +14,6 @@
namespace Botan {
/**
-* The different charsets (nominally) supported by Botan.
-*/
-enum Character_Set {
- LOCAL_CHARSET,
- UCS2_CHARSET,
- UTF8_CHARSET,
- LATIN1_CHARSET
-};
-
-/**
* Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string
* This is used for ASN.1 BMPString type
* @param ucs2 the sequence of UCS-2 characters
@@ -39,15 +29,41 @@ std::string BOTAN_UNSTABLE_API ucs2_to_utf8(const uint8_t ucs2[], size_t len);
*/
std::string BOTAN_UNSTABLE_API ucs4_to_utf8(const uint8_t ucs4[], size_t len);
+/**
+* Convert a UTF-8 string to Latin-1
+* If a character outside the Latin-1 range is encountered, an exception is thrown.
+*/
+std::string BOTAN_UNSTABLE_API utf8_to_latin1(const std::string& utf8);
+
+/**
+* The different charsets (nominally) supported by Botan.
+*/
+enum Character_Set {
+ LOCAL_CHARSET,
+ UCS2_CHARSET,
+ UTF8_CHARSET,
+ LATIN1_CHARSET
+};
+
namespace Charset {
/*
-* Character Set Handling
+* Character set conversion - avoid this.
+* For specific conversions, use the functions above like
+* ucs2_to_utf8 and utf8_to_latin1
+*
+* If you need something more complex than that, use a real library
+* such as iconv, Boost.Locale, or ICU
*/
-std::string BOTAN_PUBLIC_API(2,0) transcode(const std::string& str,
- Character_Set to,
- Character_Set from);
+std::string BOTAN_PUBLIC_API(2,0)
+ BOTAN_DEPRECATED("Avoid. See comment in header.")
+ transcode(const std::string& str,
+ Character_Set to,
+ Character_Set from);
+/*
+* Simple character classifier functions
+*/
bool BOTAN_PUBLIC_API(2,0) is_digit(char c);
bool BOTAN_PUBLIC_API(2,0) is_space(char c);
bool BOTAN_PUBLIC_API(2,0) caseless_cmp(char x, char y);
diff --git a/src/lib/x509/name_constraint.cpp b/src/lib/x509/name_constraint.cpp
index e27dca9ec..e098bcd8d 100644
--- a/src/lib/x509/name_constraint.cpp
+++ b/src/lib/x509/name_constraint.cpp
@@ -7,7 +7,6 @@
#include <botan/name_constraint.h>
#include <botan/ber_dec.h>
-#include <botan/charset.h>
#include <botan/loadstor.h>
#include <botan/x509_dn.h>
#include <botan/x509cert.h>
@@ -49,7 +48,7 @@ void GeneralName::decode_from(class BER_Decoder& ber)
if(tag == 1 || tag == 2 || tag == 6)
{
- m_name = Charset::transcode(ASN1::to_string(obj), LATIN1_CHARSET, LOCAL_CHARSET);
+ m_name = ASN1::to_string(obj);
if(tag == 1)
{
diff --git a/src/lib/x509/x509_ext.cpp b/src/lib/x509/x509_ext.cpp
index 3141d3c44..6e4c29d42 100644
--- a/src/lib/x509/x509_ext.cpp
+++ b/src/lib/x509/x509_ext.cpp
@@ -12,7 +12,6 @@
#include <botan/der_enc.h>
#include <botan/ber_dec.h>
#include <botan/oids.h>
-#include <botan/charset.h>
#include <botan/internal/bit_ops.h>
#include <algorithm>
#include <sstream>
@@ -735,7 +734,7 @@ std::vector<uint8_t> Authority_Information_Access::encode_inner() const
.start_cons(SEQUENCE)
.start_cons(SEQUENCE)
.encode(OIDS::lookup("PKIX.OCSP"))
- .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.iso_8859())
+ .add_object(ASN1_Tag(6), CONTEXT_SPECIFIC, url.value())
.end_cons()
.end_cons().get_contents_unlocked();
}
@@ -758,9 +757,7 @@ void Authority_Information_Access::decode_inner(const std::vector<uint8_t>& in)
if(name.type_tag == 6 && name.class_tag == CONTEXT_SPECIFIC)
{
- m_ocsp_responder = Charset::transcode(ASN1::to_string(name),
- LATIN1_CHARSET,
- LOCAL_CHARSET);
+ m_ocsp_responder = ASN1::to_string(name);
}
}
diff --git a/src/tests/test_utils.cpp b/src/tests/test_utils.cpp
index da2d25d5e..d102a3e46 100644
--- a/src/tests/test_utils.cpp
+++ b/src/tests/test_utils.cpp
@@ -6,6 +6,8 @@
* Botan is released under the Simplified BSD License (see license.txt)
*/
+#define BOTAN_NO_DEPRECATED_WARNINGS
+
#include "tests.h"
#include <functional>
#include <ctime>
@@ -421,17 +423,15 @@ class Charset_Tests final : public Text_Based_Test
{
converted = Botan::ucs4_to_utf8(in.data(), in.size());
}
- else if(type == "UTF16-LATIN1")
+ else if(type == "UTF8-LATIN1")
{
- converted = Botan::Charset::transcode(in_str,
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UCS2_CHARSET);
+ converted = Botan::utf8_to_latin1(in_str);
}
- else if(type == "UTF8-LATIN1")
+ else if(type == "UTF16-LATIN1")
{
converted = Botan::Charset::transcode(in_str,
Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::Character_Set::UCS2_CHARSET);
}
else if(type == "LATIN1-UTF8")
{
@@ -484,32 +484,25 @@ class Charset_Tests final : public Text_Based_Test
result.test_throws("conversion fails for non-Latin1 characters", []()
{
// "abcdefÅžabcdef"
- std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5,
- 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
- };
+ const std::vector<uint8_t> input = {
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5,
+ 0xB8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
+ };
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
result.test_throws("invalid utf-8 string", []()
{
// sequence truncated
- std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 };
-
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ const std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC5 };
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
result.test_throws("invalid utf-8 string", []()
{
std::vector<uint8_t> input = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xC8, 0xB8, 0x61 };
-
- Botan::Charset::transcode(std::string(input.begin(), input.end()),
- Botan::Character_Set::LATIN1_CHARSET,
- Botan::Character_Set::UTF8_CHARSET);
+ Botan::utf8_to_latin1(std::string(input.begin(), input.end()));
});
return result;